aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/9p.c314
-rw-r--r--fs/9p/9p.h77
-rw-r--r--fs/9p/Makefile11
-rw-r--r--fs/9p/conv.c906
-rw-r--r--fs/9p/conv.h35
-rw-r--r--fs/9p/debug.h23
-rw-r--r--fs/9p/error.c10
-rw-r--r--fs/9p/error.h2
-rw-r--r--fs/9p/fid.c5
-rw-r--r--fs/9p/mux.c1145
-rw-r--r--fs/9p/mux.h41
-rw-r--r--fs/9p/trans_fd.c53
-rw-r--r--fs/9p/trans_sock.c160
-rw-r--r--fs/9p/transport.h4
-rw-r--r--fs/9p/v9fs.c59
-rw-r--r--fs/9p/v9fs.h17
-rw-r--r--fs/9p/v9fs_vfs.h6
-rw-r--r--fs/9p/vfs_addr.c109
-rw-r--r--fs/9p/vfs_dentry.c15
-rw-r--r--fs/9p/vfs_dir.c47
-rw-r--r--fs/9p/vfs_file.c32
-rw-r--r--fs/9p/vfs_inode.c620
-rw-r--r--fs/9p/vfs_super.c14
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Kconfig.binfmt2
-rw-r--r--fs/Makefile4
-rw-r--r--fs/affs/inode.c4
-rw-r--r--fs/afs/cmservice.c2
-rw-r--r--fs/afs/dir.c2
-rw-r--r--fs/afs/volume.h4
-rw-r--r--fs/aio.c3
-rw-r--r--fs/attr.c26
-rw-r--r--fs/autofs/root.c5
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/expire.c12
-rw-r--r--fs/autofs4/inode.c4
-rw-r--r--fs/autofs4/root.c28
-rw-r--r--fs/binfmt_aout.c2
-rw-r--r--fs/binfmt_elf.c30
-rw-r--r--fs/binfmt_elf_fdpic.c2
-rw-r--r--fs/binfmt_flat.c19
-rw-r--r--fs/binfmt_misc.c14
-rw-r--r--fs/bio.c5
-rw-r--r--fs/block_dev.c4
-rw-r--r--fs/buffer.c91
-rw-r--r--fs/char_dev.c96
-rw-r--r--fs/cifs/CHANGES10
-rw-r--r--fs/cifs/README12
-rw-r--r--fs/cifs/cifs_debug.c51
-rw-r--r--fs/cifs/cifs_fs_sb.h5
-rw-r--r--fs/cifs/cifs_uniupr.h2
-rw-r--r--fs/cifs/cifsacl.h38
-rw-r--r--fs/cifs/cifsencrypt.c55
-rw-r--r--fs/cifs/cifsfs.c34
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h18
-rw-r--r--fs/cifs/cifspdu.h97
-rw-r--r--fs/cifs/cifsproto.h23
-rw-r--r--fs/cifs/cifssmb.c294
-rw-r--r--fs/cifs/connect.c89
-rw-r--r--fs/cifs/dir.c8
-rw-r--r--fs/cifs/file.c79
-rw-r--r--fs/cifs/inode.c20
-rw-r--r--fs/cifs/misc.c17
-rw-r--r--fs/cifs/readdir.c17
-rw-r--r--fs/cifs/rfc1002pdu.h4
-rw-r--r--fs/cifs/transport.c40
-rw-r--r--fs/cifs/xattr.c22
-rw-r--r--fs/coda/cache.c2
-rw-r--r--fs/coda/dir.c4
-rw-r--r--fs/coda/file.c8
-rw-r--r--fs/compat.c334
-rw-r--r--fs/compat_ioctl.c395
-rw-r--r--fs/configfs/dir.c54
-rw-r--r--fs/configfs/file.c4
-rw-r--r--fs/configfs/inode.c6
-rw-r--r--fs/dcache.c43
-rw-r--r--fs/dcookies.c1
-rw-r--r--fs/debugfs/inode.c8
-rw-r--r--fs/devfs/base.c22
-rw-r--r--fs/devpts/inode.c8
-rw-r--r--fs/direct-io.c30
-rw-r--r--fs/dquot.c17
-rw-r--r--fs/drop_caches.c68
-rw-r--r--fs/efs/super.c5
-rw-r--r--fs/exec.c22
-rw-r--r--fs/exportfs/expfs.c91
-rw-r--r--fs/ext2/acl.c11
-rw-r--r--fs/ext2/balloc.c1
-rw-r--r--fs/ext2/bitmap.c7
-rw-r--r--fs/ext2/dir.c2
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/ioctl.c1
-rw-r--r--fs/ext2/namei.c5
-rw-r--r--fs/ext2/super.c4
-rw-r--r--fs/ext2/xattr.c6
-rw-r--r--fs/ext2/xattr_trusted.c5
-rw-r--r--fs/ext2/xattr_user.c14
-rw-r--r--fs/ext3/acl.c11
-rw-r--r--fs/ext3/balloc.c3
-rw-r--r--fs/ext3/bitmap.c8
-rw-r--r--fs/ext3/bitmap.h8
-rw-r--r--fs/ext3/ialloc.c7
-rw-r--r--fs/ext3/ioctl.c1
-rw-r--r--fs/ext3/namei.c7
-rw-r--r--fs/ext3/resize.c32
-rw-r--r--fs/ext3/super.c60
-rw-r--r--fs/ext3/xattr.c6
-rw-r--r--fs/ext3/xattr_trusted.c5
-rw-r--r--fs/ext3/xattr_user.c15
-rw-r--r--fs/fat/cache.c14
-rw-r--r--fs/fat/dir.c28
-rw-r--r--fs/fat/fatent.c10
-rw-r--r--fs/fat/file.c38
-rw-r--r--fs/fat/inode.c119
-rw-r--r--fs/fat/misc.c8
-rw-r--r--fs/fcntl.c9
-rw-r--r--fs/fifo.c6
-rw-r--r--fs/file_table.c9
-rw-r--r--fs/freevxfs/vxfs_immed.c4
-rw-r--r--fs/fuse/dev.c225
-rw-r--r--fs/fuse/dir.c18
-rw-r--r--fs/fuse/file.c87
-rw-r--r--fs/fuse/fuse_i.h98
-rw-r--r--fs/fuse/inode.c268
-rw-r--r--fs/hfs/bfind.c3
-rw-r--r--fs/hfs/bnode.c6
-rw-r--r--fs/hfs/brec.c2
-rw-r--r--fs/hfs/btree.c10
-rw-r--r--fs/hfs/catalog.c2
-rw-r--r--fs/hfs/dir.c12
-rw-r--r--fs/hfs/hfs_fs.h3
-rw-r--r--fs/hfs/inode.c5
-rw-r--r--fs/hfs/mdb.c22
-rw-r--r--fs/hfs/super.c40
-rw-r--r--fs/hfsplus/bfind.c3
-rw-r--r--fs/hfsplus/bitmap.c8
-rw-r--r--fs/hfsplus/bnode.c11
-rw-r--r--fs/hfsplus/brec.c2
-rw-r--r--fs/hfsplus/btree.c37
-rw-r--r--fs/hfsplus/catalog.c44
-rw-r--r--fs/hfsplus/dir.c47
-rw-r--r--fs/hfsplus/extents.c6
-rw-r--r--fs/hfsplus/hfsplus_fs.h15
-rw-r--r--fs/hfsplus/hfsplus_raw.h13
-rw-r--r--fs/hfsplus/inode.c25
-rw-r--r--fs/hfsplus/ioctl.c1
-rw-r--r--fs/hfsplus/options.c18
-rw-r--r--fs/hfsplus/super.c73
-rw-r--r--fs/hfsplus/unicode.c30
-rw-r--r--fs/hfsplus/wrapper.c17
-rw-r--r--fs/hpfs/dir.c6
-rw-r--r--fs/hppfs/hppfs_kern.c6
-rw-r--r--fs/hugetlbfs/inode.c7
-rw-r--r--fs/inode.c57
-rw-r--r--fs/inotify.c1
-rw-r--r--fs/ioctl.c1
-rw-r--r--fs/ioprio.c1
-rw-r--r--fs/isofs/namei.c5
-rw-r--r--fs/jbd/checkpoint.c2
-rw-r--r--fs/jbd/commit.c3
-rw-r--r--fs/jffs/inode-v23.c6
-rw-r--r--fs/jffs2/build.c2
-rw-r--r--fs/jffs2/fs.c1
-rw-r--r--fs/jffs2/nodelist.c4
-rw-r--r--fs/jfs/jfs_dmap.c3
-rw-r--r--fs/jfs/jfs_imap.c6
-rw-r--r--fs/jfs/jfs_incore.h4
-rw-r--r--fs/jfs/jfs_txnmgr.c6
-rw-r--r--fs/jfs/jfs_umount.c6
-rw-r--r--fs/jfs/resize.c3
-rw-r--r--fs/jfs/super.c3
-rw-r--r--fs/jfs/xattr.c67
-rw-r--r--fs/libfs.c20
-rw-r--r--fs/lockd/xdr.c6
-rw-r--r--fs/locks.c7
-rw-r--r--fs/mbcache.c6
-rw-r--r--fs/mpage.c4
-rw-r--r--fs/namei.c271
-rw-r--r--fs/namespace.c36
-rw-r--r--fs/ncpfs/dir.c2
-rw-r--r--fs/ncpfs/file.c2
-rw-r--r--fs/ncpfs/inode.c19
-rw-r--r--fs/ncpfs/ioctl.c21
-rw-r--r--fs/ncpfs/ncplib_kernel.h4
-rw-r--r--fs/nfs/dir.c10
-rw-r--r--fs/nfs/inode.c25
-rw-r--r--fs/nfs/nfsroot.c4
-rw-r--r--fs/nfsctl.c1
-rw-r--r--fs/nfsd/nfs4proc.c55
-rw-r--r--fs/nfsd/nfs4recover.c30
-rw-r--r--fs/nfsd/nfs4state.c220
-rw-r--r--fs/nfsd/nfs4xdr.c10
-rw-r--r--fs/nfsd/nfsproc.c37
-rw-r--r--fs/nfsd/nfsxdr.c4
-rw-r--r--fs/nfsd/vfs.c222
-rw-r--r--fs/ntfs/attrib.c4
-rw-r--r--fs/ntfs/dir.c8
-rw-r--r--fs/ntfs/file.c20
-rw-r--r--fs/ntfs/index.c6
-rw-r--r--fs/ntfs/inode.c28
-rw-r--r--fs/ntfs/namei.c6
-rw-r--r--fs/ntfs/quota.c6
-rw-r--r--fs/ntfs/super.c44
-rw-r--r--fs/ocfs2/alloc.c24
-rw-r--r--fs/ocfs2/cluster/masklog.h7
-rw-r--r--fs/ocfs2/cluster/nodemanager.c2
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/file.c9
-rw-r--r--fs/ocfs2/inode.c12
-rw-r--r--fs/ocfs2/journal.c14
-rw-r--r--fs/ocfs2/localalloc.c6
-rw-r--r--fs/ocfs2/mmap.c8
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/open.c123
-rw-r--r--fs/partitions/Kconfig27
-rw-r--r--fs/partitions/Makefile1
-rw-r--r--fs/partitions/check.c4
-rw-r--r--fs/partitions/karma.c57
-rw-r--r--fs/partitions/karma.h8
-rw-r--r--fs/pipe.c50
-rw-r--r--fs/pnode.c2
-rw-r--r--fs/proc/array.c6
-rw-r--r--fs/proc/base.c1
-rw-r--r--fs/proc/generic.c2
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/internal.h4
-rw-r--r--fs/proc/kcore.c1
-rw-r--r--fs/proc/proc_devtree.c24
-rw-r--r--fs/proc/proc_misc.c164
-rw-r--r--fs/proc/root.c3
-rw-r--r--fs/proc/task_mmu.c127
-rw-r--r--fs/proc/vmcore.c6
-rw-r--r--fs/quota.c7
-rw-r--r--fs/quota_v2.c3
-rw-r--r--fs/read_write.c4
-rw-r--r--fs/readdir.c4
-rw-r--r--fs/reiserfs/file.c12
-rw-r--r--fs/reiserfs/inode.c14
-rw-r--r--fs/reiserfs/ioctl.c5
-rw-r--r--fs/reiserfs/journal.c5
-rw-r--r--fs/reiserfs/namei.c6
-rw-r--r--fs/reiserfs/super.c4
-rw-r--r--fs/reiserfs/tail_conversion.c2
-rw-r--r--fs/reiserfs/xattr.c56
-rw-r--r--fs/reiserfs/xattr_acl.c7
-rw-r--r--fs/reiserfs/xattr_trusted.c1
-rw-r--r--fs/reiserfs/xattr_user.c30
-rw-r--r--fs/relayfs/buffers.c3
-rw-r--r--fs/relayfs/inode.c226
-rw-r--r--fs/relayfs/relay.c69
-rw-r--r--fs/relayfs/relay.h4
-rw-r--r--fs/romfs/inode.c6
-rw-r--r--fs/select.c348
-rw-r--r--fs/smbfs/Makefile1
-rw-r--r--fs/smbfs/cache.c4
-rw-r--r--fs/smbfs/file.c7
-rw-r--r--fs/smbfs/inode.c35
-rw-r--r--fs/smbfs/proc.c3
-rw-r--r--fs/smbfs/request.c13
-rw-r--r--fs/stat.c66
-rw-r--r--fs/super.c5
-rw-r--r--fs/sysfs/dir.c31
-rw-r--r--fs/sysfs/file.c17
-rw-r--r--fs/sysfs/inode.c9
-rw-r--r--fs/sysfs/symlink.c5
-rw-r--r--fs/sysv/ChangeLog2
-rw-r--r--fs/sysv/dir.c4
-rw-r--r--fs/udf/balloc.c2
-rw-r--r--fs/udf/file.c1
-rw-r--r--fs/udf/inode.c5
-rw-r--r--fs/ufs/balloc.c20
-rw-r--r--fs/ufs/ialloc.c4
-rw-r--r--fs/ufs/inode.c11
-rw-r--r--fs/ufs/super.c65
-rw-r--r--fs/ufs/util.h28
-rw-r--r--fs/xattr.c199
-rw-r--r--fs/xfs/Kbuild6
-rw-r--r--fs/xfs/linux-2.6/mutex.h18
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c1111
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h10
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c1373
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h696
-rw-r--r--fs/xfs/linux-2.6/xfs_cred.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c137
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h5
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c76
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h18
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h19
-rw-r--r--fs/xfs/quota/xfs_dquot.c4
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c4
-rw-r--r--fs/xfs/quota/xfs_qm.c142
-rw-r--r--fs/xfs/quota/xfs_qm.h2
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c11
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h2
-rw-r--r--fs/xfs/support/debug.c60
-rw-r--r--fs/xfs/support/debug.h25
-rw-r--r--fs/xfs/support/uuid.c29
-rw-r--r--fs/xfs/xfs_acl.c1
-rw-r--r--fs/xfs/xfs_arch.h22
-rw-r--r--fs/xfs/xfs_attr.c32
-rw-r--r--fs/xfs/xfs_attr_leaf.c12
-rw-r--r--fs/xfs/xfs_attr_leaf.h79
-rw-r--r--fs/xfs/xfs_bmap.c412
-rw-r--r--fs/xfs/xfs_bmap.h7
-rw-r--r--fs/xfs/xfs_clnt.h2
-rw-r--r--fs/xfs/xfs_dfrag.c16
-rw-r--r--fs/xfs/xfs_dinode.h22
-rw-r--r--fs/xfs/xfs_dir.c2
-rw-r--r--fs/xfs/xfs_dir.h2
-rw-r--r--fs/xfs/xfs_dir2.h3
-rw-r--r--fs/xfs/xfs_dir_leaf.c96
-rw-r--r--fs/xfs/xfs_dir_leaf.h64
-rw-r--r--fs/xfs/xfs_dmapi.h14
-rw-r--r--fs/xfs/xfs_error.c1
-rw-r--r--fs/xfs/xfs_error.h8
-rw-r--r--fs/xfs/xfs_fs.h10
-rw-r--r--fs/xfs/xfs_fsops.c30
-rw-r--r--fs/xfs/xfs_fsops.h1
-rw-r--r--fs/xfs/xfs_iget.c5
-rw-r--r--fs/xfs/xfs_inode.c61
-rw-r--r--fs/xfs/xfs_inode.h4
-rw-r--r--fs/xfs/xfs_inode_item.c9
-rw-r--r--fs/xfs/xfs_iomap.c426
-rw-r--r--fs/xfs/xfs_itable.c5
-rw-r--r--fs/xfs/xfs_log.c175
-rw-r--r--fs/xfs/xfs_log.h19
-rw-r--r--fs/xfs/xfs_log_priv.h77
-rw-r--r--fs/xfs/xfs_log_recover.c12
-rw-r--r--fs/xfs/xfs_mount.c73
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_rename.c7
-rw-r--r--fs/xfs/xfs_rw.c9
-rw-r--r--fs/xfs/xfs_sb.h17
-rw-r--r--fs/xfs/xfs_trans.c14
-rw-r--r--fs/xfs/xfs_trans.h1
-rw-r--r--fs/xfs/xfs_trans_item.c22
-rw-r--r--fs/xfs/xfs_utils.c9
-rw-r--r--fs/xfs/xfs_vfsops.c50
-rw-r--r--fs/xfs/xfs_vnodeops.c255
349 files changed, 9845 insertions, 7189 deletions
diff --git a/fs/9p/9p.c b/fs/9p/9p.c
index e847f504a47c..1a6d08761f39 100644
--- a/fs/9p/9p.c
+++ b/fs/9p/9p.c
@@ -1,8 +1,9 @@
1/* 1/*
2 * linux/fs/9p/9p.c 2 * linux/fs/9p/9p.c
3 * 3 *
4 * This file contains functions 9P2000 functions 4 * This file contains functions to perform synchronous 9P calls
5 * 5 *
6 * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> 7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> 8 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 * 9 *
@@ -33,6 +34,7 @@
33#include "debug.h" 34#include "debug.h"
34#include "v9fs.h" 35#include "v9fs.h"
35#include "9p.h" 36#include "9p.h"
37#include "conv.h"
36#include "mux.h" 38#include "mux.h"
37 39
38/** 40/**
@@ -46,16 +48,21 @@
46 48
47int 49int
48v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, 50v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
49 char *version, struct v9fs_fcall **fcall) 51 char *version, struct v9fs_fcall **rcp)
50{ 52{
51 struct v9fs_fcall msg; 53 int ret;
54 struct v9fs_fcall *tc;
52 55
53 dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version); 56 dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
54 msg.id = TVERSION; 57 tc = v9fs_create_tversion(msize, version);
55 msg.params.tversion.msize = msize;
56 msg.params.tversion.version = version;
57 58
58 return v9fs_mux_rpc(v9ses, &msg, fcall); 59 if (!IS_ERR(tc)) {
60 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
61 kfree(tc);
62 } else
63 ret = PTR_ERR(tc);
64
65 return ret;
59} 66}
60 67
61/** 68/**
@@ -71,19 +78,45 @@ v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
71 78
72int 79int
73v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, 80v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
74 u32 fid, u32 afid, struct v9fs_fcall **fcall) 81 u32 fid, u32 afid, struct v9fs_fcall **rcp)
75{ 82{
76 struct v9fs_fcall msg; 83 int ret;
84 struct v9fs_fcall* tc;
77 85
78 dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname, 86 dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
79 aname, fid, afid); 87 aname, fid, afid);
80 msg.id = TATTACH;
81 msg.params.tattach.fid = fid;
82 msg.params.tattach.afid = afid;
83 msg.params.tattach.uname = uname;
84 msg.params.tattach.aname = aname;
85 88
86 return v9fs_mux_rpc(v9ses, &msg, fcall); 89 tc = v9fs_create_tattach(fid, afid, uname, aname);
90 if (!IS_ERR(tc)) {
91 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
92 kfree(tc);
93 } else
94 ret = PTR_ERR(tc);
95
96 return ret;
97}
98
99static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc,
100 struct v9fs_fcall *rc, int err)
101{
102 int fid;
103 struct v9fs_session_info *v9ses;
104
105 if (err)
106 return;
107
108 fid = tc->params.tclunk.fid;
109 kfree(tc);
110
111 if (!rc)
112 return;
113
114 dprintk(DEBUG_9P, "tcall id %d rcall id %d\n", tc->id, rc->id);
115 v9ses = a;
116 if (rc->id == RCLUNK)
117 v9fs_put_idpool(fid, &v9ses->fidpool);
118
119 kfree(rc);
87} 120}
88 121
89/** 122/**
@@ -95,16 +128,25 @@ v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
95 */ 128 */
96 129
97int 130int
98v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid, 131v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
99 struct v9fs_fcall **fcall)
100{ 132{
101 struct v9fs_fcall msg; 133 int ret;
134 struct v9fs_fcall *tc, *rc;
102 135
103 dprintk(DEBUG_9P, "fid %d\n", fid); 136 dprintk(DEBUG_9P, "fid %d\n", fid);
104 msg.id = TCLUNK;
105 msg.params.tclunk.fid = fid;
106 137
107 return v9fs_mux_rpc(v9ses, &msg, fcall); 138 rc = NULL;
139 tc = v9fs_create_tclunk(fid);
140 if (!IS_ERR(tc))
141 ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
142 else
143 ret = PTR_ERR(tc);
144
145 if (ret)
146 dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret);
147
148 v9fs_t_clunk_cb(v9ses, tc, rc, ret);
149 return ret;
108} 150}
109 151
110/** 152/**
@@ -114,14 +156,21 @@ v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
114 * 156 *
115 */ 157 */
116 158
117int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag) 159int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
118{ 160{
119 struct v9fs_fcall msg; 161 int ret;
162 struct v9fs_fcall *tc;
163
164 dprintk(DEBUG_9P, "oldtag %d\n", oldtag);
165
166 tc = v9fs_create_tflush(oldtag);
167 if (!IS_ERR(tc)) {
168 ret = v9fs_mux_rpc(v9ses->mux, tc, NULL);
169 kfree(tc);
170 } else
171 ret = PTR_ERR(tc);
120 172
121 dprintk(DEBUG_9P, "oldtag %d\n", tag); 173 return ret;
122 msg.id = TFLUSH;
123 msg.params.tflush.oldtag = tag;
124 return v9fs_mux_rpc(v9ses, &msg, NULL);
125} 174}
126 175
127/** 176/**
@@ -133,17 +182,22 @@ int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag)
133 */ 182 */
134 183
135int 184int
136v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall) 185v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp)
137{ 186{
138 struct v9fs_fcall msg; 187 int ret;
188 struct v9fs_fcall *tc;
139 189
140 dprintk(DEBUG_9P, "fid %d\n", fid); 190 dprintk(DEBUG_9P, "fid %d\n", fid);
141 if (fcall)
142 *fcall = NULL;
143 191
144 msg.id = TSTAT; 192 ret = -ENOMEM;
145 msg.params.tstat.fid = fid; 193 tc = v9fs_create_tstat(fid);
146 return v9fs_mux_rpc(v9ses, &msg, fcall); 194 if (!IS_ERR(tc)) {
195 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
196 kfree(tc);
197 } else
198 ret = PTR_ERR(tc);
199
200 return ret;
147} 201}
148 202
149/** 203/**
@@ -157,16 +211,21 @@ v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall)
157 211
158int 212int
159v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, 213v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
160 struct v9fs_stat *stat, struct v9fs_fcall **fcall) 214 struct v9fs_wstat *wstat, struct v9fs_fcall **rcp)
161{ 215{
162 struct v9fs_fcall msg; 216 int ret;
217 struct v9fs_fcall *tc;
218
219 dprintk(DEBUG_9P, "fid %d\n", fid);
163 220
164 dprintk(DEBUG_9P, "fid %d length %d\n", fid, (int)stat->length); 221 tc = v9fs_create_twstat(fid, wstat, v9ses->extended);
165 msg.id = TWSTAT; 222 if (!IS_ERR(tc)) {
166 msg.params.twstat.fid = fid; 223 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
167 msg.params.twstat.stat = stat; 224 kfree(tc);
225 } else
226 ret = PTR_ERR(tc);
168 227
169 return v9fs_mux_rpc(v9ses, &msg, fcall); 228 return ret;
170} 229}
171 230
172/** 231/**
@@ -183,23 +242,27 @@ v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
183 242
184int 243int
185v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, 244v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
186 char *name, struct v9fs_fcall **fcall) 245 char *name, struct v9fs_fcall **rcp)
187{ 246{
188 struct v9fs_fcall msg; 247 int ret;
248 struct v9fs_fcall *tc;
249 int nwname;
189 250
190 dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name); 251 dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
191 msg.id = TWALK; 252
192 msg.params.twalk.fid = fid; 253 if (name)
193 msg.params.twalk.newfid = newfid; 254 nwname = 1;
194 255 else
195 if (name) { 256 nwname = 0;
196 msg.params.twalk.nwname = 1; 257
197 msg.params.twalk.wnames = &name; 258 tc = v9fs_create_twalk(fid, newfid, nwname, &name);
198 } else { 259 if (!IS_ERR(tc)) {
199 msg.params.twalk.nwname = 0; 260 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
200 } 261 kfree(tc);
201 262 } else
202 return v9fs_mux_rpc(v9ses, &msg, fcall); 263 ret = PTR_ERR(tc);
264
265 return ret;
203} 266}
204 267
205/** 268/**
@@ -214,19 +277,21 @@ v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
214 277
215int 278int
216v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode, 279v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
217 struct v9fs_fcall **fcall) 280 struct v9fs_fcall **rcp)
218{ 281{
219 struct v9fs_fcall msg; 282 int ret;
220 long errorno = -1; 283 struct v9fs_fcall *tc;
221 284
222 dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode); 285 dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
223 msg.id = TOPEN;
224 msg.params.topen.fid = fid;
225 msg.params.topen.mode = mode;
226 286
227 errorno = v9fs_mux_rpc(v9ses, &msg, fcall); 287 tc = v9fs_create_topen(fid, mode);
288 if (!IS_ERR(tc)) {
289 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
290 kfree(tc);
291 } else
292 ret = PTR_ERR(tc);
228 293
229 return errorno; 294 return ret;
230} 295}
231 296
232/** 297/**
@@ -239,14 +304,21 @@ v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
239 304
240int 305int
241v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid, 306v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
242 struct v9fs_fcall **fcall) 307 struct v9fs_fcall **rcp)
243{ 308{
244 struct v9fs_fcall msg; 309 int ret;
310 struct v9fs_fcall *tc;
245 311
246 dprintk(DEBUG_9P, "fid %d\n", fid); 312 dprintk(DEBUG_9P, "fid %d\n", fid);
247 msg.id = TREMOVE; 313
248 msg.params.tremove.fid = fid; 314 tc = v9fs_create_tremove(fid);
249 return v9fs_mux_rpc(v9ses, &msg, fcall); 315 if (!IS_ERR(tc)) {
316 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
317 kfree(tc);
318 } else
319 ret = PTR_ERR(tc);
320
321 return ret;
250} 322}
251 323
252/** 324/**
@@ -262,20 +334,22 @@ v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
262 334
263int 335int
264v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, 336v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
265 u32 perm, u8 mode, struct v9fs_fcall **fcall) 337 u32 perm, u8 mode, struct v9fs_fcall **rcp)
266{ 338{
267 struct v9fs_fcall msg; 339 int ret;
340 struct v9fs_fcall *tc;
268 341
269 dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n", 342 dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
270 fid, name, perm, mode); 343 fid, name, perm, mode);
271 344
272 msg.id = TCREATE; 345 tc = v9fs_create_tcreate(fid, name, perm, mode);
273 msg.params.tcreate.fid = fid; 346 if (!IS_ERR(tc)) {
274 msg.params.tcreate.name = name; 347 ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
275 msg.params.tcreate.perm = perm; 348 kfree(tc);
276 msg.params.tcreate.mode = mode; 349 } else
350 ret = PTR_ERR(tc);
277 351
278 return v9fs_mux_rpc(v9ses, &msg, fcall); 352 return ret;
279} 353}
280 354
281/** 355/**
@@ -290,31 +364,29 @@ v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
290 364
291int 365int
292v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset, 366v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
293 u32 count, struct v9fs_fcall **fcall) 367 u32 count, struct v9fs_fcall **rcp)
294{ 368{
295 struct v9fs_fcall msg; 369 int ret;
296 struct v9fs_fcall *rc = NULL; 370 struct v9fs_fcall *tc, *rc;
297 long errorno = -1; 371
298 372 dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
299 dprintk(DEBUG_9P, "fid %d offset 0x%lx count 0x%x\n", fid, 373 (long long unsigned) offset, count);
300 (long unsigned int)offset, count); 374
301 msg.id = TREAD; 375 tc = v9fs_create_tread(fid, offset, count);
302 msg.params.tread.fid = fid; 376 if (!IS_ERR(tc)) {
303 msg.params.tread.offset = offset; 377 ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
304 msg.params.tread.count = count; 378 if (!ret)
305 errorno = v9fs_mux_rpc(v9ses, &msg, &rc); 379 ret = rc->params.rread.count;
306 380 if (rcp)
307 if (!errorno) { 381 *rcp = rc;
308 errorno = rc->params.rread.count; 382 else
309 dump_data(rc->params.rread.data, rc->params.rread.count); 383 kfree(rc);
310 } 384
311 385 kfree(tc);
312 if (fcall) 386 } else
313 *fcall = rc; 387 ret = PTR_ERR(tc);
314 else 388
315 kfree(rc); 389 return ret;
316
317 return errorno;
318} 390}
319 391
320/** 392/**
@@ -328,32 +400,30 @@ v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
328 */ 400 */
329 401
330int 402int
331v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, 403v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count,
332 u64 offset, u32 count, void *data, struct v9fs_fcall **fcall) 404 const char __user *data, struct v9fs_fcall **rcp)
333{ 405{
334 struct v9fs_fcall msg; 406 int ret;
335 struct v9fs_fcall *rc = NULL; 407 struct v9fs_fcall *tc, *rc;
336 long errorno = -1;
337 408
338 dprintk(DEBUG_9P, "fid %d offset 0x%llx count 0x%x\n", fid, 409 dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
339 (unsigned long long)offset, count); 410 (long long unsigned) offset, count);
340 dump_data(data, count);
341 411
342 msg.id = TWRITE; 412 tc = v9fs_create_twrite(fid, offset, count, data);
343 msg.params.twrite.fid = fid; 413 if (!IS_ERR(tc)) {
344 msg.params.twrite.offset = offset; 414 ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
345 msg.params.twrite.count = count;
346 msg.params.twrite.data = data;
347 415
348 errorno = v9fs_mux_rpc(v9ses, &msg, &rc); 416 if (!ret)
417 ret = rc->params.rwrite.count;
418 if (rcp)
419 *rcp = rc;
420 else
421 kfree(rc);
349 422
350 if (!errorno) 423 kfree(tc);
351 errorno = rc->params.rwrite.count; 424 } else
425 ret = PTR_ERR(tc);
352 426
353 if (fcall) 427 return ret;
354 *fcall = rc;
355 else
356 kfree(rc);
357
358 return errorno;
359} 428}
429
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
index f55424216be2..0cd374d94717 100644
--- a/fs/9p/9p.h
+++ b/fs/9p/9p.h
@@ -3,6 +3,7 @@
3 * 3 *
4 * 9P protocol definitions. 4 * 9P protocol definitions.
5 * 5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> 7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> 8 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 * 9 *
@@ -100,9 +101,18 @@ enum {
100 V9FS_QTFILE = 0x00, 101 V9FS_QTFILE = 0x00,
101}; 102};
102 103
104#define V9FS_NOTAG (u16)(~0)
105#define V9FS_NOFID (u32)(~0)
106#define V9FS_MAXWELEM 16
107
103/* ample room for Twrite/Rread header (iounit) */ 108/* ample room for Twrite/Rread header (iounit) */
104#define V9FS_IOHDRSZ 24 109#define V9FS_IOHDRSZ 24
105 110
111struct v9fs_str {
112 u16 len;
113 char *str;
114};
115
106/* qids are the unique ID for a file (like an inode */ 116/* qids are the unique ID for a file (like an inode */
107struct v9fs_qid { 117struct v9fs_qid {
108 u8 type; 118 u8 type;
@@ -120,6 +130,29 @@ struct v9fs_stat {
120 u32 atime; 130 u32 atime;
121 u32 mtime; 131 u32 mtime;
122 u64 length; 132 u64 length;
133 struct v9fs_str name;
134 struct v9fs_str uid;
135 struct v9fs_str gid;
136 struct v9fs_str muid;
137 struct v9fs_str extension; /* 9p2000.u extensions */
138 u32 n_uid; /* 9p2000.u extensions */
139 u32 n_gid; /* 9p2000.u extensions */
140 u32 n_muid; /* 9p2000.u extensions */
141};
142
143/* file metadata (stat) structure used to create Twstat message
144 The is similar to v9fs_stat, but the strings don't point to
145 the same memory block and should be freed separately
146*/
147struct v9fs_wstat {
148 u16 size;
149 u16 type;
150 u32 dev;
151 struct v9fs_qid qid;
152 u32 mode;
153 u32 atime;
154 u32 mtime;
155 u64 length;
123 char *name; 156 char *name;
124 char *uid; 157 char *uid;
125 char *gid; 158 char *gid;
@@ -128,25 +161,24 @@ struct v9fs_stat {
128 u32 n_uid; /* 9p2000.u extensions */ 161 u32 n_uid; /* 9p2000.u extensions */
129 u32 n_gid; /* 9p2000.u extensions */ 162 u32 n_gid; /* 9p2000.u extensions */
130 u32 n_muid; /* 9p2000.u extensions */ 163 u32 n_muid; /* 9p2000.u extensions */
131 char data[0];
132}; 164};
133 165
134/* Structures for Protocol Operations */ 166/* Structures for Protocol Operations */
135 167
136struct Tversion { 168struct Tversion {
137 u32 msize; 169 u32 msize;
138 char *version; 170 struct v9fs_str version;
139}; 171};
140 172
141struct Rversion { 173struct Rversion {
142 u32 msize; 174 u32 msize;
143 char *version; 175 struct v9fs_str version;
144}; 176};
145 177
146struct Tauth { 178struct Tauth {
147 u32 afid; 179 u32 afid;
148 char *uname; 180 struct v9fs_str uname;
149 char *aname; 181 struct v9fs_str aname;
150}; 182};
151 183
152struct Rauth { 184struct Rauth {
@@ -154,12 +186,12 @@ struct Rauth {
154}; 186};
155 187
156struct Rerror { 188struct Rerror {
157 char *error; 189 struct v9fs_str error;
158 u32 errno; /* 9p2000.u extension */ 190 u32 errno; /* 9p2000.u extension */
159}; 191};
160 192
161struct Tflush { 193struct Tflush {
162 u32 oldtag; 194 u16 oldtag;
163}; 195};
164 196
165struct Rflush { 197struct Rflush {
@@ -168,8 +200,8 @@ struct Rflush {
168struct Tattach { 200struct Tattach {
169 u32 fid; 201 u32 fid;
170 u32 afid; 202 u32 afid;
171 char *uname; 203 struct v9fs_str uname;
172 char *aname; 204 struct v9fs_str aname;
173}; 205};
174 206
175struct Rattach { 207struct Rattach {
@@ -179,13 +211,13 @@ struct Rattach {
179struct Twalk { 211struct Twalk {
180 u32 fid; 212 u32 fid;
181 u32 newfid; 213 u32 newfid;
182 u32 nwname; 214 u16 nwname;
183 char **wnames; 215 struct v9fs_str wnames[16];
184}; 216};
185 217
186struct Rwalk { 218struct Rwalk {
187 u32 nwqid; 219 u16 nwqid;
188 struct v9fs_qid *wqids; 220 struct v9fs_qid wqids[16];
189}; 221};
190 222
191struct Topen { 223struct Topen {
@@ -200,7 +232,7 @@ struct Ropen {
200 232
201struct Tcreate { 233struct Tcreate {
202 u32 fid; 234 u32 fid;
203 char *name; 235 struct v9fs_str name;
204 u32 perm; 236 u32 perm;
205 u8 mode; 237 u8 mode;
206}; 238};
@@ -251,12 +283,12 @@ struct Tstat {
251}; 283};
252 284
253struct Rstat { 285struct Rstat {
254 struct v9fs_stat *stat; 286 struct v9fs_stat stat;
255}; 287};
256 288
257struct Twstat { 289struct Twstat {
258 u32 fid; 290 u32 fid;
259 struct v9fs_stat *stat; 291 struct v9fs_stat stat;
260}; 292};
261 293
262struct Rwstat { 294struct Rwstat {
@@ -271,6 +303,7 @@ struct v9fs_fcall {
271 u32 size; 303 u32 size;
272 u8 id; 304 u8 id;
273 u16 tag; 305 u16 tag;
306 void *sdata;
274 307
275 union { 308 union {
276 struct Tversion tversion; 309 struct Tversion tversion;
@@ -303,7 +336,9 @@ struct v9fs_fcall {
303 } params; 336 } params;
304}; 337};
305 338
306#define FCALL_ERROR(fcall) (fcall ? fcall->params.rerror.error : "") 339#define PRINT_FCALL_ERROR(s, fcall) dprintk(DEBUG_ERROR, "%s: %.*s\n", s, \
340 fcall?fcall->params.rerror.error.len:0, \
341 fcall?fcall->params.rerror.error.str:"");
307 342
308int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, 343int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
309 char *version, struct v9fs_fcall **rcall); 344 char *version, struct v9fs_fcall **rcall);
@@ -311,8 +346,7 @@ int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
311int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, 346int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
312 u32 fid, u32 afid, struct v9fs_fcall **rcall); 347 u32 fid, u32 afid, struct v9fs_fcall **rcall);
313 348
314int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid, 349int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid);
315 struct v9fs_fcall **rcall);
316 350
317int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag); 351int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag);
318 352
@@ -320,7 +354,7 @@ int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
320 struct v9fs_fcall **rcall); 354 struct v9fs_fcall **rcall);
321 355
322int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, 356int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
323 struct v9fs_stat *stat, struct v9fs_fcall **rcall); 357 struct v9fs_wstat *wstat, struct v9fs_fcall **rcall);
324 358
325int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, 359int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
326 char *name, struct v9fs_fcall **rcall); 360 char *name, struct v9fs_fcall **rcall);
@@ -338,4 +372,5 @@ int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
338 u64 offset, u32 count, struct v9fs_fcall **rcall); 372 u64 offset, u32 count, struct v9fs_fcall **rcall);
339 373
340int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, 374int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
341 u32 count, void *data, struct v9fs_fcall **rcall); 375 u32 count, const char __user * data,
376 struct v9fs_fcall **rcall);
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index e4e4ffe5a7dc..2f4ce43f7b6c 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -1,17 +1,18 @@
1obj-$(CONFIG_9P_FS) := 9p2000.o 1obj-$(CONFIG_9P_FS) := 9p2000.o
2 2
39p2000-objs := \ 39p2000-objs := \
4 trans_fd.o \
5 trans_sock.o \
6 mux.o \
7 9p.o \
8 conv.o \
4 vfs_super.o \ 9 vfs_super.o \
5 vfs_inode.o \ 10 vfs_inode.o \
11 vfs_addr.o \
6 vfs_file.o \ 12 vfs_file.o \
7 vfs_dir.o \ 13 vfs_dir.o \
8 vfs_dentry.o \ 14 vfs_dentry.o \
9 error.o \ 15 error.o \
10 mux.o \
11 trans_fd.o \
12 trans_sock.o \
13 9p.o \
14 conv.o \
15 v9fs.o \ 16 v9fs.o \
16 fid.o 17 fid.o
17 18
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index 18121af99d3e..32a9f99154e2 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -30,7 +30,7 @@
30#include <linux/errno.h> 30#include <linux/errno.h>
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/idr.h> 32#include <linux/idr.h>
33 33#include <asm/uaccess.h>
34#include "debug.h" 34#include "debug.h"
35#include "v9fs.h" 35#include "v9fs.h"
36#include "9p.h" 36#include "9p.h"
@@ -56,20 +56,23 @@ static inline int buf_check_overflow(struct cbuf *buf)
56 return buf->p > buf->ep; 56 return buf->p > buf->ep;
57} 57}
58 58
59static inline int buf_check_size(struct cbuf *buf, int len) 59static int buf_check_size(struct cbuf *buf, int len)
60{ 60{
61 if (buf->p+len > buf->ep) { 61 if (buf->p + len > buf->ep) {
62 if (buf->p < buf->ep) { 62 if (buf->p < buf->ep) {
63 eprintk(KERN_ERR, "buffer overflow\n"); 63 eprintk(KERN_ERR, "buffer overflow: want %d has %d\n",
64 len, (int)(buf->ep - buf->p));
65 dump_stack();
64 buf->p = buf->ep + 1; 66 buf->p = buf->ep + 1;
65 return 0;
66 } 67 }
68
69 return 0;
67 } 70 }
68 71
69 return 1; 72 return 1;
70} 73}
71 74
72static inline void *buf_alloc(struct cbuf *buf, int len) 75static void *buf_alloc(struct cbuf *buf, int len)
73{ 76{
74 void *ret = NULL; 77 void *ret = NULL;
75 78
@@ -81,7 +84,7 @@ static inline void *buf_alloc(struct cbuf *buf, int len)
81 return ret; 84 return ret;
82} 85}
83 86
84static inline void buf_put_int8(struct cbuf *buf, u8 val) 87static void buf_put_int8(struct cbuf *buf, u8 val)
85{ 88{
86 if (buf_check_size(buf, 1)) { 89 if (buf_check_size(buf, 1)) {
87 buf->p[0] = val; 90 buf->p[0] = val;
@@ -89,7 +92,7 @@ static inline void buf_put_int8(struct cbuf *buf, u8 val)
89 } 92 }
90} 93}
91 94
92static inline void buf_put_int16(struct cbuf *buf, u16 val) 95static void buf_put_int16(struct cbuf *buf, u16 val)
93{ 96{
94 if (buf_check_size(buf, 2)) { 97 if (buf_check_size(buf, 2)) {
95 *(__le16 *) buf->p = cpu_to_le16(val); 98 *(__le16 *) buf->p = cpu_to_le16(val);
@@ -97,7 +100,7 @@ static inline void buf_put_int16(struct cbuf *buf, u16 val)
97 } 100 }
98} 101}
99 102
100static inline void buf_put_int32(struct cbuf *buf, u32 val) 103static void buf_put_int32(struct cbuf *buf, u32 val)
101{ 104{
102 if (buf_check_size(buf, 4)) { 105 if (buf_check_size(buf, 4)) {
103 *(__le32 *)buf->p = cpu_to_le32(val); 106 *(__le32 *)buf->p = cpu_to_le32(val);
@@ -105,7 +108,7 @@ static inline void buf_put_int32(struct cbuf *buf, u32 val)
105 } 108 }
106} 109}
107 110
108static inline void buf_put_int64(struct cbuf *buf, u64 val) 111static void buf_put_int64(struct cbuf *buf, u64 val)
109{ 112{
110 if (buf_check_size(buf, 8)) { 113 if (buf_check_size(buf, 8)) {
111 *(__le64 *)buf->p = cpu_to_le64(val); 114 *(__le64 *)buf->p = cpu_to_le64(val);
@@ -113,7 +116,7 @@ static inline void buf_put_int64(struct cbuf *buf, u64 val)
113 } 116 }
114} 117}
115 118
116static inline void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen) 119static void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
117{ 120{
118 if (buf_check_size(buf, slen + 2)) { 121 if (buf_check_size(buf, slen + 2)) {
119 buf_put_int16(buf, slen); 122 buf_put_int16(buf, slen);
@@ -127,15 +130,7 @@ static inline void buf_put_string(struct cbuf *buf, const char *s)
127 buf_put_stringn(buf, s, strlen(s)); 130 buf_put_stringn(buf, s, strlen(s));
128} 131}
129 132
130static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen) 133static u8 buf_get_int8(struct cbuf *buf)
131{
132 if (buf_check_size(buf, datalen)) {
133 memcpy(buf->p, data, datalen);
134 buf->p += datalen;
135 }
136}
137
138static inline u8 buf_get_int8(struct cbuf *buf)
139{ 134{
140 u8 ret = 0; 135 u8 ret = 0;
141 136
@@ -147,7 +142,7 @@ static inline u8 buf_get_int8(struct cbuf *buf)
147 return ret; 142 return ret;
148} 143}
149 144
150static inline u16 buf_get_int16(struct cbuf *buf) 145static u16 buf_get_int16(struct cbuf *buf)
151{ 146{
152 u16 ret = 0; 147 u16 ret = 0;
153 148
@@ -159,7 +154,7 @@ static inline u16 buf_get_int16(struct cbuf *buf)
159 return ret; 154 return ret;
160} 155}
161 156
162static inline u32 buf_get_int32(struct cbuf *buf) 157static u32 buf_get_int32(struct cbuf *buf)
163{ 158{
164 u32 ret = 0; 159 u32 ret = 0;
165 160
@@ -171,7 +166,7 @@ static inline u32 buf_get_int32(struct cbuf *buf)
171 return ret; 166 return ret;
172} 167}
173 168
174static inline u64 buf_get_int64(struct cbuf *buf) 169static u64 buf_get_int64(struct cbuf *buf)
175{ 170{
176 u64 ret = 0; 171 u64 ret = 0;
177 172
@@ -183,86 +178,37 @@ static inline u64 buf_get_int64(struct cbuf *buf)
183 return ret; 178 return ret;
184} 179}
185 180
186static inline int 181static void buf_get_str(struct cbuf *buf, struct v9fs_str *vstr)
187buf_get_string(struct cbuf *buf, char *data, unsigned int datalen)
188{
189 u16 len = 0;
190
191 len = buf_get_int16(buf);
192 if (!buf_check_overflow(buf) && buf_check_size(buf, len) && len+1>datalen) {
193 memcpy(data, buf->p, len);
194 data[len] = 0;
195 buf->p += len;
196 len++;
197 }
198
199 return len;
200}
201
202static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf)
203{
204 char *ret;
205 u16 len;
206
207 ret = NULL;
208 len = buf_get_int16(buf);
209
210 if (!buf_check_overflow(buf) && buf_check_size(buf, len) &&
211 buf_check_size(sbuf, len+1)) {
212
213 memcpy(sbuf->p, buf->p, len);
214 sbuf->p[len] = 0;
215 ret = sbuf->p;
216 buf->p += len;
217 sbuf->p += len + 1;
218 }
219
220 return ret;
221}
222
223static inline int buf_get_data(struct cbuf *buf, void *data, int datalen)
224{ 182{
225 int ret = 0; 183 vstr->len = buf_get_int16(buf);
226 184 if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) {
227 if (buf_check_size(buf, datalen)) { 185 vstr->str = buf->p;
228 memcpy(data, buf->p, datalen); 186 buf->p += vstr->len;
229 buf->p += datalen; 187 } else {
230 ret = datalen; 188 vstr->len = 0;
189 vstr->str = NULL;
231 } 190 }
232
233 return ret;
234} 191}
235 192
236static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf, 193static void buf_get_qid(struct cbuf *bufp, struct v9fs_qid *qid)
237 int datalen)
238{ 194{
239 char *ret = NULL; 195 qid->type = buf_get_int8(bufp);
240 int n = 0; 196 qid->version = buf_get_int32(bufp);
241 197 qid->path = buf_get_int64(bufp);
242 if (buf_check_size(dbuf, datalen)) {
243 n = buf_get_data(buf, dbuf->p, datalen);
244 if (n > 0) {
245 ret = dbuf->p;
246 dbuf->p += n;
247 }
248 }
249
250 return ret;
251} 198}
252 199
253/** 200/**
254 * v9fs_size_stat - calculate the size of a variable length stat struct 201 * v9fs_size_wstat - calculate the size of a variable length stat struct
255 * @v9ses: session information
256 * @stat: metadata (stat) structure 202 * @stat: metadata (stat) structure
203 * @extended: non-zero if 9P2000.u
257 * 204 *
258 */ 205 */
259 206
260static int v9fs_size_stat(struct v9fs_session_info *v9ses, 207static int v9fs_size_wstat(struct v9fs_wstat *wstat, int extended)
261 struct v9fs_stat *stat)
262{ 208{
263 int size = 0; 209 int size = 0;
264 210
265 if (stat == NULL) { 211 if (wstat == NULL) {
266 eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n"); 212 eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n");
267 return 0; 213 return 0;
268 } 214 }
@@ -279,82 +225,38 @@ static int v9fs_size_stat(struct v9fs_session_info *v9ses,
279 8 + /* length[8] */ 225 8 + /* length[8] */
280 8; /* minimum sum of string lengths */ 226 8; /* minimum sum of string lengths */
281 227
282 if (stat->name) 228 if (wstat->name)
283 size += strlen(stat->name); 229 size += strlen(wstat->name);
284 if (stat->uid) 230 if (wstat->uid)
285 size += strlen(stat->uid); 231 size += strlen(wstat->uid);
286 if (stat->gid) 232 if (wstat->gid)
287 size += strlen(stat->gid); 233 size += strlen(wstat->gid);
288 if (stat->muid) 234 if (wstat->muid)
289 size += strlen(stat->muid); 235 size += strlen(wstat->muid);
290 236
291 if (v9ses->extended) { 237 if (extended) {
292 size += 4 + /* n_uid[4] */ 238 size += 4 + /* n_uid[4] */
293 4 + /* n_gid[4] */ 239 4 + /* n_gid[4] */
294 4 + /* n_muid[4] */ 240 4 + /* n_muid[4] */
295 2; /* string length of extension[4] */ 241 2; /* string length of extension[4] */
296 if (stat->extension) 242 if (wstat->extension)
297 size += strlen(stat->extension); 243 size += strlen(wstat->extension);
298 } 244 }
299 245
300 return size; 246 return size;
301} 247}
302 248
303/** 249/**
304 * serialize_stat - safely format a stat structure for transmission 250 * buf_get_stat - safely decode a recieved metadata (stat) structure
305 * @v9ses: session info
306 * @stat: metadata (stat) structure
307 * @bufp: buffer to serialize structure into
308 *
309 */
310
311static int
312serialize_stat(struct v9fs_session_info *v9ses, struct v9fs_stat *stat,
313 struct cbuf *bufp)
314{
315 buf_put_int16(bufp, stat->size);
316 buf_put_int16(bufp, stat->type);
317 buf_put_int32(bufp, stat->dev);
318 buf_put_int8(bufp, stat->qid.type);
319 buf_put_int32(bufp, stat->qid.version);
320 buf_put_int64(bufp, stat->qid.path);
321 buf_put_int32(bufp, stat->mode);
322 buf_put_int32(bufp, stat->atime);
323 buf_put_int32(bufp, stat->mtime);
324 buf_put_int64(bufp, stat->length);
325
326 buf_put_string(bufp, stat->name);
327 buf_put_string(bufp, stat->uid);
328 buf_put_string(bufp, stat->gid);
329 buf_put_string(bufp, stat->muid);
330
331 if (v9ses->extended) {
332 buf_put_string(bufp, stat->extension);
333 buf_put_int32(bufp, stat->n_uid);
334 buf_put_int32(bufp, stat->n_gid);
335 buf_put_int32(bufp, stat->n_muid);
336 }
337
338 if (buf_check_overflow(bufp))
339 return 0;
340
341 return stat->size;
342}
343
344/**
345 * deserialize_stat - safely decode a recieved metadata (stat) structure
346 * @v9ses: session info
347 * @bufp: buffer to deserialize 251 * @bufp: buffer to deserialize
348 * @stat: metadata (stat) structure 252 * @stat: metadata (stat) structure
349 * @dbufp: buffer to deserialize variable strings into 253 * @extended: non-zero if 9P2000.u
350 * 254 *
351 */ 255 */
352 256
353static inline int 257static void
354deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp, 258buf_get_stat(struct cbuf *bufp, struct v9fs_stat *stat, int extended)
355 struct v9fs_stat *stat, struct cbuf *dbufp)
356{ 259{
357
358 stat->size = buf_get_int16(bufp); 260 stat->size = buf_get_int16(bufp);
359 stat->type = buf_get_int16(bufp); 261 stat->type = buf_get_int16(bufp);
360 stat->dev = buf_get_int32(bufp); 262 stat->dev = buf_get_int32(bufp);
@@ -365,282 +267,82 @@ deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp,
365 stat->atime = buf_get_int32(bufp); 267 stat->atime = buf_get_int32(bufp);
366 stat->mtime = buf_get_int32(bufp); 268 stat->mtime = buf_get_int32(bufp);
367 stat->length = buf_get_int64(bufp); 269 stat->length = buf_get_int64(bufp);
368 stat->name = buf_get_stringb(bufp, dbufp); 270 buf_get_str(bufp, &stat->name);
369 stat->uid = buf_get_stringb(bufp, dbufp); 271 buf_get_str(bufp, &stat->uid);
370 stat->gid = buf_get_stringb(bufp, dbufp); 272 buf_get_str(bufp, &stat->gid);
371 stat->muid = buf_get_stringb(bufp, dbufp); 273 buf_get_str(bufp, &stat->muid);
372 274
373 if (v9ses->extended) { 275 if (extended) {
374 stat->extension = buf_get_stringb(bufp, dbufp); 276 buf_get_str(bufp, &stat->extension);
375 stat->n_uid = buf_get_int32(bufp); 277 stat->n_uid = buf_get_int32(bufp);
376 stat->n_gid = buf_get_int32(bufp); 278 stat->n_gid = buf_get_int32(bufp);
377 stat->n_muid = buf_get_int32(bufp); 279 stat->n_muid = buf_get_int32(bufp);
378 } 280 }
379
380 if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
381 return 0;
382
383 return stat->size + 2;
384}
385
386/**
387 * deserialize_statb - wrapper for decoding a received metadata structure
388 * @v9ses: session info
389 * @bufp: buffer to deserialize
390 * @dbufp: buffer to deserialize variable strings into
391 *
392 */
393
394static inline struct v9fs_stat *deserialize_statb(struct v9fs_session_info
395 *v9ses, struct cbuf *bufp,
396 struct cbuf *dbufp)
397{
398 struct v9fs_stat *ret = buf_alloc(dbufp, sizeof(struct v9fs_stat));
399
400 if (ret) {
401 int n = deserialize_stat(v9ses, bufp, ret, dbufp);
402 if (n <= 0)
403 return NULL;
404 }
405
406 return ret;
407} 281}
408 282
409/** 283/**
410 * v9fs_deserialize_stat - decode a received metadata structure 284 * v9fs_deserialize_stat - decode a received metadata structure
411 * @v9ses: session info
412 * @buf: buffer to deserialize 285 * @buf: buffer to deserialize
413 * @buflen: length of received buffer 286 * @buflen: length of received buffer
414 * @stat: metadata structure to decode into 287 * @stat: metadata structure to decode into
415 * @statlen: length of destination metadata structure 288 * @extended: non-zero if 9P2000.u
416 * 289 *
290 * Note: stat will point to the buf region.
417 */ 291 */
418 292
419int 293int
420v9fs_deserialize_stat(struct v9fs_session_info *v9ses, void *buf, 294v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
421 u32 buflen, struct v9fs_stat *stat, u32 statlen) 295 int extended)
422{ 296{
423 struct cbuf buffer; 297 struct cbuf buffer;
424 struct cbuf *bufp = &buffer; 298 struct cbuf *bufp = &buffer;
425 struct cbuf dbuffer; 299 unsigned char *p;
426 struct cbuf *dbufp = &dbuffer;
427 300
428 buf_init(bufp, buf, buflen); 301 buf_init(bufp, buf, buflen);
429 buf_init(dbufp, (char *)stat + sizeof(struct v9fs_stat), 302 p = bufp->p;
430 statlen - sizeof(struct v9fs_stat)); 303 buf_get_stat(bufp, stat, extended);
431
432 return deserialize_stat(v9ses, bufp, stat, dbufp);
433}
434
435static inline int
436v9fs_size_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall)
437{
438 int size = 4 + 1 + 2; /* size[4] msg[1] tag[2] */
439 int i = 0;
440
441 switch (fcall->id) {
442 default:
443 eprintk(KERN_ERR, "bad msg type %d\n", fcall->id);
444 return 0;
445 case TVERSION: /* msize[4] version[s] */
446 size += 4 + 2 + strlen(fcall->params.tversion.version);
447 break;
448 case TAUTH: /* afid[4] uname[s] aname[s] */
449 size += 4 + 2 + strlen(fcall->params.tauth.uname) +
450 2 + strlen(fcall->params.tauth.aname);
451 break;
452 case TFLUSH: /* oldtag[2] */
453 size += 2;
454 break;
455 case TATTACH: /* fid[4] afid[4] uname[s] aname[s] */
456 size += 4 + 4 + 2 + strlen(fcall->params.tattach.uname) +
457 2 + strlen(fcall->params.tattach.aname);
458 break;
459 case TWALK: /* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
460 size += 4 + 4 + 2;
461 /* now compute total for the array of names */
462 for (i = 0; i < fcall->params.twalk.nwname; i++)
463 size += 2 + strlen(fcall->params.twalk.wnames[i]);
464 break;
465 case TOPEN: /* fid[4] mode[1] */
466 size += 4 + 1;
467 break;
468 case TCREATE: /* fid[4] name[s] perm[4] mode[1] */
469 size += 4 + 2 + strlen(fcall->params.tcreate.name) + 4 + 1;
470 break;
471 case TREAD: /* fid[4] offset[8] count[4] */
472 size += 4 + 8 + 4;
473 break;
474 case TWRITE: /* fid[4] offset[8] count[4] data[count] */
475 size += 4 + 8 + 4 + fcall->params.twrite.count;
476 break;
477 case TCLUNK: /* fid[4] */
478 size += 4;
479 break;
480 case TREMOVE: /* fid[4] */
481 size += 4;
482 break;
483 case TSTAT: /* fid[4] */
484 size += 4;
485 break;
486 case TWSTAT: /* fid[4] stat[n] */
487 fcall->params.twstat.stat->size =
488 v9fs_size_stat(v9ses, fcall->params.twstat.stat);
489 size += 4 + 2 + 2 + fcall->params.twstat.stat->size;
490 }
491 return size;
492}
493
494/*
495 * v9fs_serialize_fcall - marshall fcall struct into a packet
496 * @v9ses: session information
497 * @fcall: structure to convert
498 * @data: buffer to serialize fcall into
499 * @datalen: length of buffer to serialize fcall into
500 *
501 */
502
503int
504v9fs_serialize_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall,
505 void *data, u32 datalen)
506{
507 int i = 0;
508 struct v9fs_stat *stat = NULL;
509 struct cbuf buffer;
510 struct cbuf *bufp = &buffer;
511
512 buf_init(bufp, data, datalen);
513
514 if (!fcall) {
515 eprintk(KERN_ERR, "no fcall\n");
516 return -EINVAL;
517 }
518
519 fcall->size = v9fs_size_fcall(v9ses, fcall);
520
521 buf_put_int32(bufp, fcall->size);
522 buf_put_int8(bufp, fcall->id);
523 buf_put_int16(bufp, fcall->tag);
524
525 dprintk(DEBUG_CONV, "size %d id %d tag %d\n", fcall->size, fcall->id,
526 fcall->tag);
527
528 /* now encode it */
529 switch (fcall->id) {
530 default:
531 eprintk(KERN_ERR, "bad msg type: %d\n", fcall->id);
532 return -EPROTO;
533 case TVERSION:
534 buf_put_int32(bufp, fcall->params.tversion.msize);
535 buf_put_string(bufp, fcall->params.tversion.version);
536 break;
537 case TAUTH:
538 buf_put_int32(bufp, fcall->params.tauth.afid);
539 buf_put_string(bufp, fcall->params.tauth.uname);
540 buf_put_string(bufp, fcall->params.tauth.aname);
541 break;
542 case TFLUSH:
543 buf_put_int16(bufp, fcall->params.tflush.oldtag);
544 break;
545 case TATTACH:
546 buf_put_int32(bufp, fcall->params.tattach.fid);
547 buf_put_int32(bufp, fcall->params.tattach.afid);
548 buf_put_string(bufp, fcall->params.tattach.uname);
549 buf_put_string(bufp, fcall->params.tattach.aname);
550 break;
551 case TWALK:
552 buf_put_int32(bufp, fcall->params.twalk.fid);
553 buf_put_int32(bufp, fcall->params.twalk.newfid);
554 buf_put_int16(bufp, fcall->params.twalk.nwname);
555 for (i = 0; i < fcall->params.twalk.nwname; i++)
556 buf_put_string(bufp, fcall->params.twalk.wnames[i]);
557 break;
558 case TOPEN:
559 buf_put_int32(bufp, fcall->params.topen.fid);
560 buf_put_int8(bufp, fcall->params.topen.mode);
561 break;
562 case TCREATE:
563 buf_put_int32(bufp, fcall->params.tcreate.fid);
564 buf_put_string(bufp, fcall->params.tcreate.name);
565 buf_put_int32(bufp, fcall->params.tcreate.perm);
566 buf_put_int8(bufp, fcall->params.tcreate.mode);
567 break;
568 case TREAD:
569 buf_put_int32(bufp, fcall->params.tread.fid);
570 buf_put_int64(bufp, fcall->params.tread.offset);
571 buf_put_int32(bufp, fcall->params.tread.count);
572 break;
573 case TWRITE:
574 buf_put_int32(bufp, fcall->params.twrite.fid);
575 buf_put_int64(bufp, fcall->params.twrite.offset);
576 buf_put_int32(bufp, fcall->params.twrite.count);
577 buf_put_data(bufp, fcall->params.twrite.data,
578 fcall->params.twrite.count);
579 break;
580 case TCLUNK:
581 buf_put_int32(bufp, fcall->params.tclunk.fid);
582 break;
583 case TREMOVE:
584 buf_put_int32(bufp, fcall->params.tremove.fid);
585 break;
586 case TSTAT:
587 buf_put_int32(bufp, fcall->params.tstat.fid);
588 break;
589 case TWSTAT:
590 buf_put_int32(bufp, fcall->params.twstat.fid);
591 stat = fcall->params.twstat.stat;
592
593 buf_put_int16(bufp, stat->size + 2);
594 serialize_stat(v9ses, stat, bufp);
595 break;
596 }
597 304
598 if (buf_check_overflow(bufp)) 305 if (buf_check_overflow(bufp))
599 return -EIO; 306 return 0;
600 307 else
601 return fcall->size; 308 return bufp->p - p;
602} 309}
603 310
604/** 311/**
605 * deserialize_fcall - unmarshal a response 312 * deserialize_fcall - unmarshal a response
606 * @v9ses: session information
607 * @msgsize: size of rcall message
608 * @buf: recieved buffer 313 * @buf: recieved buffer
609 * @buflen: length of received buffer 314 * @buflen: length of received buffer
610 * @rcall: fcall structure to populate 315 * @rcall: fcall structure to populate
611 * @rcalllen: length of fcall structure to populate 316 * @rcalllen: length of fcall structure to populate
317 * @extended: non-zero if 9P2000.u
612 * 318 *
613 */ 319 */
614 320
615int 321int
616v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize, 322v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
617 void *buf, u32 buflen, struct v9fs_fcall *rcall, 323 int extended)
618 int rcalllen)
619{ 324{
620 325
621 struct cbuf buffer; 326 struct cbuf buffer;
622 struct cbuf *bufp = &buffer; 327 struct cbuf *bufp = &buffer;
623 struct cbuf dbuffer;
624 struct cbuf *dbufp = &dbuffer;
625 int i = 0; 328 int i = 0;
626 329
627 buf_init(bufp, buf, buflen); 330 buf_init(bufp, buf, buflen);
628 buf_init(dbufp, (char *)rcall + sizeof(struct v9fs_fcall),
629 rcalllen - sizeof(struct v9fs_fcall));
630 331
631 rcall->size = msgsize; 332 rcall->size = buf_get_int32(bufp);
632 rcall->id = buf_get_int8(bufp); 333 rcall->id = buf_get_int8(bufp);
633 rcall->tag = buf_get_int16(bufp); 334 rcall->tag = buf_get_int16(bufp);
634 335
635 dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id, 336 dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id,
636 rcall->tag); 337 rcall->tag);
338
637 switch (rcall->id) { 339 switch (rcall->id) {
638 default: 340 default:
639 eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id); 341 eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id);
640 return -EPROTO; 342 return -EPROTO;
641 case RVERSION: 343 case RVERSION:
642 rcall->params.rversion.msize = buf_get_int32(bufp); 344 rcall->params.rversion.msize = buf_get_int32(bufp);
643 rcall->params.rversion.version = buf_get_stringb(bufp, dbufp); 345 buf_get_str(bufp, &rcall->params.rversion.version);
644 break; 346 break;
645 case RFLUSH: 347 case RFLUSH:
646 break; 348 break;
@@ -651,34 +353,27 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
651 break; 353 break;
652 case RWALK: 354 case RWALK:
653 rcall->params.rwalk.nwqid = buf_get_int16(bufp); 355 rcall->params.rwalk.nwqid = buf_get_int16(bufp);
654 rcall->params.rwalk.wqids = buf_alloc(dbufp, 356 if (rcall->params.rwalk.nwqid > V9FS_MAXWELEM) {
655 rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid)); 357 eprintk(KERN_ERR, "Rwalk with more than %d qids: %d\n",
656 if (rcall->params.rwalk.wqids) 358 V9FS_MAXWELEM, rcall->params.rwalk.nwqid);
657 for (i = 0; i < rcall->params.rwalk.nwqid; i++) { 359 return -EPROTO;
658 rcall->params.rwalk.wqids[i].type = 360 }
659 buf_get_int8(bufp); 361
660 rcall->params.rwalk.wqids[i].version = 362 for (i = 0; i < rcall->params.rwalk.nwqid; i++)
661 buf_get_int16(bufp); 363 buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]);
662 rcall->params.rwalk.wqids[i].path =
663 buf_get_int64(bufp);
664 }
665 break; 364 break;
666 case ROPEN: 365 case ROPEN:
667 rcall->params.ropen.qid.type = buf_get_int8(bufp); 366 buf_get_qid(bufp, &rcall->params.ropen.qid);
668 rcall->params.ropen.qid.version = buf_get_int32(bufp);
669 rcall->params.ropen.qid.path = buf_get_int64(bufp);
670 rcall->params.ropen.iounit = buf_get_int32(bufp); 367 rcall->params.ropen.iounit = buf_get_int32(bufp);
671 break; 368 break;
672 case RCREATE: 369 case RCREATE:
673 rcall->params.rcreate.qid.type = buf_get_int8(bufp); 370 buf_get_qid(bufp, &rcall->params.rcreate.qid);
674 rcall->params.rcreate.qid.version = buf_get_int32(bufp);
675 rcall->params.rcreate.qid.path = buf_get_int64(bufp);
676 rcall->params.rcreate.iounit = buf_get_int32(bufp); 371 rcall->params.rcreate.iounit = buf_get_int32(bufp);
677 break; 372 break;
678 case RREAD: 373 case RREAD:
679 rcall->params.rread.count = buf_get_int32(bufp); 374 rcall->params.rread.count = buf_get_int32(bufp);
680 rcall->params.rread.data = buf_get_datab(bufp, dbufp, 375 rcall->params.rread.data = bufp->p;
681 rcall->params.rread.count); 376 buf_check_size(bufp, rcall->params.rread.count);
682 break; 377 break;
683 case RWRITE: 378 case RWRITE:
684 rcall->params.rwrite.count = buf_get_int32(bufp); 379 rcall->params.rwrite.count = buf_get_int32(bufp);
@@ -689,20 +384,443 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
689 break; 384 break;
690 case RSTAT: 385 case RSTAT:
691 buf_get_int16(bufp); 386 buf_get_int16(bufp);
692 rcall->params.rstat.stat = 387 buf_get_stat(bufp, &rcall->params.rstat.stat, extended);
693 deserialize_statb(v9ses, bufp, dbufp);
694 break; 388 break;
695 case RWSTAT: 389 case RWSTAT:
696 break; 390 break;
697 case RERROR: 391 case RERROR:
698 rcall->params.rerror.error = buf_get_stringb(bufp, dbufp); 392 buf_get_str(bufp, &rcall->params.rerror.error);
699 if (v9ses->extended) 393 if (extended)
700 rcall->params.rerror.errno = buf_get_int16(bufp); 394 rcall->params.rerror.errno = buf_get_int16(bufp);
701 break; 395 break;
702 } 396 }
703 397
704 if (buf_check_overflow(bufp) || buf_check_overflow(dbufp)) 398 if (buf_check_overflow(bufp)) {
399 dprintk(DEBUG_ERROR, "buffer overflow\n");
705 return -EIO; 400 return -EIO;
401 }
402
403 return bufp->p - bufp->sp;
404}
405
406static inline void v9fs_put_int8(struct cbuf *bufp, u8 val, u8 * p)
407{
408 *p = val;
409 buf_put_int8(bufp, val);
410}
411
412static inline void v9fs_put_int16(struct cbuf *bufp, u16 val, u16 * p)
413{
414 *p = val;
415 buf_put_int16(bufp, val);
416}
417
418static inline void v9fs_put_int32(struct cbuf *bufp, u32 val, u32 * p)
419{
420 *p = val;
421 buf_put_int32(bufp, val);
422}
423
424static inline void v9fs_put_int64(struct cbuf *bufp, u64 val, u64 * p)
425{
426 *p = val;
427 buf_put_int64(bufp, val);
428}
429
430static void
431v9fs_put_str(struct cbuf *bufp, char *data, struct v9fs_str *str)
432{
433 if (data) {
434 str->len = strlen(data);
435 str->str = bufp->p;
436 } else {
437 str->len = 0;
438 str->str = NULL;
439 }
440
441 buf_put_stringn(bufp, data, str->len);
442}
443
444static int
445v9fs_put_user_data(struct cbuf *bufp, const char __user * data, int count,
446 unsigned char **pdata)
447{
448 *pdata = buf_alloc(bufp, count);
449 return copy_from_user(*pdata, data, count);
450}
451
452static void
453v9fs_put_wstat(struct cbuf *bufp, struct v9fs_wstat *wstat,
454 struct v9fs_stat *stat, int statsz, int extended)
455{
456 v9fs_put_int16(bufp, statsz, &stat->size);
457 v9fs_put_int16(bufp, wstat->type, &stat->type);
458 v9fs_put_int32(bufp, wstat->dev, &stat->dev);
459 v9fs_put_int8(bufp, wstat->qid.type, &stat->qid.type);
460 v9fs_put_int32(bufp, wstat->qid.version, &stat->qid.version);
461 v9fs_put_int64(bufp, wstat->qid.path, &stat->qid.path);
462 v9fs_put_int32(bufp, wstat->mode, &stat->mode);
463 v9fs_put_int32(bufp, wstat->atime, &stat->atime);
464 v9fs_put_int32(bufp, wstat->mtime, &stat->mtime);
465 v9fs_put_int64(bufp, wstat->length, &stat->length);
466
467 v9fs_put_str(bufp, wstat->name, &stat->name);
468 v9fs_put_str(bufp, wstat->uid, &stat->uid);
469 v9fs_put_str(bufp, wstat->gid, &stat->gid);
470 v9fs_put_str(bufp, wstat->muid, &stat->muid);
471
472 if (extended) {
473 v9fs_put_str(bufp, wstat->extension, &stat->extension);
474 v9fs_put_int32(bufp, wstat->n_uid, &stat->n_uid);
475 v9fs_put_int32(bufp, wstat->n_gid, &stat->n_gid);
476 v9fs_put_int32(bufp, wstat->n_muid, &stat->n_muid);
477 }
478}
479
480static struct v9fs_fcall *
481v9fs_create_common(struct cbuf *bufp, u32 size, u8 id)
482{
483 struct v9fs_fcall *fc;
484
485 size += 4 + 1 + 2; /* size[4] id[1] tag[2] */
486 fc = kmalloc(sizeof(struct v9fs_fcall) + size, GFP_KERNEL);
487 if (!fc)
488 return ERR_PTR(-ENOMEM);
489
490 fc->sdata = (char *)fc + sizeof(*fc);
491
492 buf_init(bufp, (char *)fc->sdata, size);
493 v9fs_put_int32(bufp, size, &fc->size);
494 v9fs_put_int8(bufp, id, &fc->id);
495 v9fs_put_int16(bufp, V9FS_NOTAG, &fc->tag);
496
497 return fc;
498}
499
500void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag)
501{
502 fc->tag = tag;
503 *(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag);
504}
505
506struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version)
507{
508 int size;
509 struct v9fs_fcall *fc;
510 struct cbuf buffer;
511 struct cbuf *bufp = &buffer;
512
513 size = 4 + 2 + strlen(version); /* msize[4] version[s] */
514 fc = v9fs_create_common(bufp, size, TVERSION);
515 if (IS_ERR(fc))
516 goto error;
517
518 v9fs_put_int32(bufp, msize, &fc->params.tversion.msize);
519 v9fs_put_str(bufp, version, &fc->params.tversion.version);
520
521 if (buf_check_overflow(bufp)) {
522 kfree(fc);
523 fc = ERR_PTR(-ENOMEM);
524 }
525 error:
526 return fc;
527}
528
529struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname)
530{
531 int size;
532 struct v9fs_fcall *fc;
533 struct cbuf buffer;
534 struct cbuf *bufp = &buffer;
535
536 size = 4 + 2 + strlen(uname) + 2 + strlen(aname); /* afid[4] uname[s] aname[s] */
537 fc = v9fs_create_common(bufp, size, TAUTH);
538 if (IS_ERR(fc))
539 goto error;
540
541 v9fs_put_int32(bufp, afid, &fc->params.tauth.afid);
542 v9fs_put_str(bufp, uname, &fc->params.tauth.uname);
543 v9fs_put_str(bufp, aname, &fc->params.tauth.aname);
544
545 if (buf_check_overflow(bufp)) {
546 kfree(fc);
547 fc = ERR_PTR(-ENOMEM);
548 }
549 error:
550 return fc;
551}
552
553struct v9fs_fcall *
554v9fs_create_tattach(u32 fid, u32 afid, char *uname, char *aname)
555{
556 int size;
557 struct v9fs_fcall *fc;
558 struct cbuf buffer;
559 struct cbuf *bufp = &buffer;
560
561 size = 4 + 4 + 2 + strlen(uname) + 2 + strlen(aname); /* fid[4] afid[4] uname[s] aname[s] */
562 fc = v9fs_create_common(bufp, size, TATTACH);
563 if (IS_ERR(fc))
564 goto error;
565
566 v9fs_put_int32(bufp, fid, &fc->params.tattach.fid);
567 v9fs_put_int32(bufp, afid, &fc->params.tattach.afid);
568 v9fs_put_str(bufp, uname, &fc->params.tattach.uname);
569 v9fs_put_str(bufp, aname, &fc->params.tattach.aname);
570
571 error:
572 return fc;
573}
574
575struct v9fs_fcall *v9fs_create_tflush(u16 oldtag)
576{
577 int size;
578 struct v9fs_fcall *fc;
579 struct cbuf buffer;
580 struct cbuf *bufp = &buffer;
581
582 size = 2; /* oldtag[2] */
583 fc = v9fs_create_common(bufp, size, TFLUSH);
584 if (IS_ERR(fc))
585 goto error;
586
587 v9fs_put_int16(bufp, oldtag, &fc->params.tflush.oldtag);
588
589 if (buf_check_overflow(bufp)) {
590 kfree(fc);
591 fc = ERR_PTR(-ENOMEM);
592 }
593 error:
594 return fc;
595}
596
597struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
598 char **wnames)
599{
600 int i, size;
601 struct v9fs_fcall *fc;
602 struct cbuf buffer;
603 struct cbuf *bufp = &buffer;
604
605 if (nwname > V9FS_MAXWELEM) {
606 dprintk(DEBUG_ERROR, "nwname > %d\n", V9FS_MAXWELEM);
607 return NULL;
608 }
609
610 size = 4 + 4 + 2; /* fid[4] newfid[4] nwname[2] ... */
611 for (i = 0; i < nwname; i++) {
612 size += 2 + strlen(wnames[i]); /* wname[s] */
613 }
614
615 fc = v9fs_create_common(bufp, size, TWALK);
616 if (IS_ERR(fc))
617 goto error;
618
619 v9fs_put_int32(bufp, fid, &fc->params.twalk.fid);
620 v9fs_put_int32(bufp, newfid, &fc->params.twalk.newfid);
621 v9fs_put_int16(bufp, nwname, &fc->params.twalk.nwname);
622 for (i = 0; i < nwname; i++) {
623 v9fs_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]);
624 }
625
626 if (buf_check_overflow(bufp)) {
627 kfree(fc);
628 fc = ERR_PTR(-ENOMEM);
629 }
630 error:
631 return fc;
632}
706 633
707 return rcall->size; 634struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode)
635{
636 int size;
637 struct v9fs_fcall *fc;
638 struct cbuf buffer;
639 struct cbuf *bufp = &buffer;
640
641 size = 4 + 1; /* fid[4] mode[1] */
642 fc = v9fs_create_common(bufp, size, TOPEN);
643 if (IS_ERR(fc))
644 goto error;
645
646 v9fs_put_int32(bufp, fid, &fc->params.topen.fid);
647 v9fs_put_int8(bufp, mode, &fc->params.topen.mode);
648
649 if (buf_check_overflow(bufp)) {
650 kfree(fc);
651 fc = ERR_PTR(-ENOMEM);
652 }
653 error:
654 return fc;
655}
656
657struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode)
658{
659 int size;
660 struct v9fs_fcall *fc;
661 struct cbuf buffer;
662 struct cbuf *bufp = &buffer;
663
664 size = 4 + 2 + strlen(name) + 4 + 1; /* fid[4] name[s] perm[4] mode[1] */
665 fc = v9fs_create_common(bufp, size, TCREATE);
666 if (IS_ERR(fc))
667 goto error;
668
669 v9fs_put_int32(bufp, fid, &fc->params.tcreate.fid);
670 v9fs_put_str(bufp, name, &fc->params.tcreate.name);
671 v9fs_put_int32(bufp, perm, &fc->params.tcreate.perm);
672 v9fs_put_int8(bufp, mode, &fc->params.tcreate.mode);
673
674 if (buf_check_overflow(bufp)) {
675 kfree(fc);
676 fc = ERR_PTR(-ENOMEM);
677 }
678 error:
679 return fc;
680}
681
682struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count)
683{
684 int size;
685 struct v9fs_fcall *fc;
686 struct cbuf buffer;
687 struct cbuf *bufp = &buffer;
688
689 size = 4 + 8 + 4; /* fid[4] offset[8] count[4] */
690 fc = v9fs_create_common(bufp, size, TREAD);
691 if (IS_ERR(fc))
692 goto error;
693
694 v9fs_put_int32(bufp, fid, &fc->params.tread.fid);
695 v9fs_put_int64(bufp, offset, &fc->params.tread.offset);
696 v9fs_put_int32(bufp, count, &fc->params.tread.count);
697
698 if (buf_check_overflow(bufp)) {
699 kfree(fc);
700 fc = ERR_PTR(-ENOMEM);
701 }
702 error:
703 return fc;
704}
705
706struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
707 const char __user * data)
708{
709 int size, err;
710 struct v9fs_fcall *fc;
711 struct cbuf buffer;
712 struct cbuf *bufp = &buffer;
713
714 size = 4 + 8 + 4 + count; /* fid[4] offset[8] count[4] data[count] */
715 fc = v9fs_create_common(bufp, size, TWRITE);
716 if (IS_ERR(fc))
717 goto error;
718
719 v9fs_put_int32(bufp, fid, &fc->params.twrite.fid);
720 v9fs_put_int64(bufp, offset, &fc->params.twrite.offset);
721 v9fs_put_int32(bufp, count, &fc->params.twrite.count);
722 err = v9fs_put_user_data(bufp, data, count, &fc->params.twrite.data);
723 if (err) {
724 kfree(fc);
725 fc = ERR_PTR(err);
726 }
727
728 if (buf_check_overflow(bufp)) {
729 kfree(fc);
730 fc = ERR_PTR(-ENOMEM);
731 }
732 error:
733 return fc;
734}
735
736struct v9fs_fcall *v9fs_create_tclunk(u32 fid)
737{
738 int size;
739 struct v9fs_fcall *fc;
740 struct cbuf buffer;
741 struct cbuf *bufp = &buffer;
742
743 size = 4; /* fid[4] */
744 fc = v9fs_create_common(bufp, size, TCLUNK);
745 if (IS_ERR(fc))
746 goto error;
747
748 v9fs_put_int32(bufp, fid, &fc->params.tclunk.fid);
749
750 if (buf_check_overflow(bufp)) {
751 kfree(fc);
752 fc = ERR_PTR(-ENOMEM);
753 }
754 error:
755 return fc;
756}
757
758struct v9fs_fcall *v9fs_create_tremove(u32 fid)
759{
760 int size;
761 struct v9fs_fcall *fc;
762 struct cbuf buffer;
763 struct cbuf *bufp = &buffer;
764
765 size = 4; /* fid[4] */
766 fc = v9fs_create_common(bufp, size, TREMOVE);
767 if (IS_ERR(fc))
768 goto error;
769
770 v9fs_put_int32(bufp, fid, &fc->params.tremove.fid);
771
772 if (buf_check_overflow(bufp)) {
773 kfree(fc);
774 fc = ERR_PTR(-ENOMEM);
775 }
776 error:
777 return fc;
778}
779
780struct v9fs_fcall *v9fs_create_tstat(u32 fid)
781{
782 int size;
783 struct v9fs_fcall *fc;
784 struct cbuf buffer;
785 struct cbuf *bufp = &buffer;
786
787 size = 4; /* fid[4] */
788 fc = v9fs_create_common(bufp, size, TSTAT);
789 if (IS_ERR(fc))
790 goto error;
791
792 v9fs_put_int32(bufp, fid, &fc->params.tstat.fid);
793
794 if (buf_check_overflow(bufp)) {
795 kfree(fc);
796 fc = ERR_PTR(-ENOMEM);
797 }
798 error:
799 return fc;
800}
801
802struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
803 int extended)
804{
805 int size, statsz;
806 struct v9fs_fcall *fc;
807 struct cbuf buffer;
808 struct cbuf *bufp = &buffer;
809
810 statsz = v9fs_size_wstat(wstat, extended);
811 size = 4 + 2 + 2 + statsz; /* fid[4] stat[n] */
812 fc = v9fs_create_common(bufp, size, TWSTAT);
813 if (IS_ERR(fc))
814 goto error;
815
816 v9fs_put_int32(bufp, fid, &fc->params.twstat.fid);
817 buf_put_int16(bufp, statsz + 2);
818 v9fs_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, extended);
819
820 if (buf_check_overflow(bufp)) {
821 kfree(fc);
822 fc = ERR_PTR(-ENOMEM);
823 }
824 error:
825 return fc;
708} 826}
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
index ee849613c61a..26a736e4a2e7 100644
--- a/fs/9p/conv.h
+++ b/fs/9p/conv.h
@@ -1,8 +1,9 @@
1/* 1/*
2 * linux/fs/9p/conv.h 2 * linux/fs/9p/conv.h
3 * 3 *
4 * 9P protocol conversion definitions 4 * 9P protocol conversion definitions.
5 * 5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> 7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> 8 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 * 9 *
@@ -24,13 +25,27 @@
24 * 25 *
25 */ 26 */
26 27
27int v9fs_deserialize_stat(struct v9fs_session_info *, void *buf, 28int v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
28 u32 buflen, struct v9fs_stat *stat, u32 statlen); 29 int extended);
29int v9fs_serialize_fcall(struct v9fs_session_info *, struct v9fs_fcall *tcall, 30int v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
30 void *buf, u32 buflen); 31 int extended);
31int v9fs_deserialize_fcall(struct v9fs_session_info *, u32 msglen,
32 void *buf, u32 buflen, struct v9fs_fcall *rcall,
33 int rcalllen);
34 32
35/* this one is actually in error.c right now */ 33void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag);
36int v9fs_errstr2errno(char *errstr); 34
35struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version);
36struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname);
37struct v9fs_fcall *v9fs_create_tattach(u32 fid, u32 afid, char *uname,
38 char *aname);
39struct v9fs_fcall *v9fs_create_tflush(u16 oldtag);
40struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
41 char **wnames);
42struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode);
43struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode);
44struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count);
45struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
46 const char __user *data);
47struct v9fs_fcall *v9fs_create_tclunk(u32 fid);
48struct v9fs_fcall *v9fs_create_tremove(u32 fid);
49struct v9fs_fcall *v9fs_create_tstat(u32 fid);
50struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
51 int extended);
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
index 4445f06919d9..fe551032788b 100644
--- a/fs/9p/debug.h
+++ b/fs/9p/debug.h
@@ -51,16 +51,23 @@ do { \
51#if DEBUG_DUMP_PKT 51#if DEBUG_DUMP_PKT
52static inline void dump_data(const unsigned char *data, unsigned int datalen) 52static inline void dump_data(const unsigned char *data, unsigned int datalen)
53{ 53{
54 int i, j; 54 int i, n;
55 int len = datalen; 55 char buf[5*8];
56 56
57 printk(KERN_DEBUG "data "); 57 n = 0;
58 for (i = 0; i < len; i += 4) { 58 i = 0;
59 for (j = 0; (j < 4) && (i + j < len); j++) 59 while (i < datalen) {
60 printk(KERN_DEBUG "%02x", data[i + j]); 60 n += snprintf(buf+n, sizeof(buf)-n, "%02x", data[i++]);
61 printk(KERN_DEBUG " "); 61 if (i%4 == 0)
62 n += snprintf(buf+n, sizeof(buf)-n, " ");
63
64 if (i%16 == 0) {
65 dprintk(DEBUG_ERROR, "%s\n", buf);
66 n = 0;
67 }
62 } 68 }
63 printk(KERN_DEBUG "\n"); 69
70 dprintk(DEBUG_ERROR, "%s\n", buf);
64} 71}
65#else /* DEBUG_DUMP_PKT */ 72#else /* DEBUG_DUMP_PKT */
66static inline void dump_data(const unsigned char *data, unsigned int datalen) 73static inline void dump_data(const unsigned char *data, unsigned int datalen)
diff --git a/fs/9p/error.c b/fs/9p/error.c
index 834cb179e388..e4b6f8f38b6f 100644
--- a/fs/9p/error.c
+++ b/fs/9p/error.c
@@ -33,7 +33,6 @@
33 33
34#include <linux/list.h> 34#include <linux/list.h>
35#include <linux/jhash.h> 35#include <linux/jhash.h>
36#include <linux/string.h>
37 36
38#include "debug.h" 37#include "debug.h"
39#include "error.h" 38#include "error.h"
@@ -55,7 +54,8 @@ int v9fs_error_init(void)
55 54
56 /* load initial error map into hash table */ 55 /* load initial error map into hash table */
57 for (c = errmap; c->name != NULL; c++) { 56 for (c = errmap; c->name != NULL; c++) {
58 bucket = jhash(c->name, strlen(c->name), 0) % ERRHASHSZ; 57 c->namelen = strlen(c->name);
58 bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ;
59 INIT_HLIST_NODE(&c->list); 59 INIT_HLIST_NODE(&c->list);
60 hlist_add_head(&c->list, &hash_errmap[bucket]); 60 hlist_add_head(&c->list, &hash_errmap[bucket]);
61 } 61 }
@@ -69,15 +69,15 @@ int v9fs_error_init(void)
69 * 69 *
70 */ 70 */
71 71
72int v9fs_errstr2errno(char *errstr) 72int v9fs_errstr2errno(char *errstr, int len)
73{ 73{
74 int errno = 0; 74 int errno = 0;
75 struct hlist_node *p = NULL; 75 struct hlist_node *p = NULL;
76 struct errormap *c = NULL; 76 struct errormap *c = NULL;
77 int bucket = jhash(errstr, strlen(errstr), 0) % ERRHASHSZ; 77 int bucket = jhash(errstr, len, 0) % ERRHASHSZ;
78 78
79 hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { 79 hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
80 if (!strcmp(c->name, errstr)) { 80 if (c->namelen==len && !memcmp(c->name, errstr, len)) {
81 errno = c->val; 81 errno = c->val;
82 break; 82 break;
83 } 83 }
diff --git a/fs/9p/error.h b/fs/9p/error.h
index 78f89acf7c9a..a9794e85fe51 100644
--- a/fs/9p/error.h
+++ b/fs/9p/error.h
@@ -36,6 +36,7 @@ struct errormap {
36 char *name; 36 char *name;
37 int val; 37 int val;
38 38
39 int namelen;
39 struct hlist_node list; 40 struct hlist_node list;
40}; 41};
41 42
@@ -175,4 +176,3 @@ static struct errormap errmap[] = {
175}; 176};
176 177
177extern int v9fs_error_init(void); 178extern int v9fs_error_init(void);
178extern int v9fs_errstr2errno(char *errstr);
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index d95f8626d170..eda449778fa5 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -31,9 +31,6 @@
31#include "v9fs.h" 31#include "v9fs.h"
32#include "9p.h" 32#include "9p.h"
33#include "v9fs_vfs.h" 33#include "v9fs_vfs.h"
34#include "transport.h"
35#include "mux.h"
36#include "conv.h"
37#include "fid.h" 34#include "fid.h"
38 35
39/** 36/**
@@ -164,7 +161,7 @@ static struct v9fs_fid *v9fs_fid_walk_up(struct dentry *dentry)
164 return v9fs_fid_create(dentry, v9ses, fidnum, 0); 161 return v9fs_fid_create(dentry, v9ses, fidnum, 0);
165 162
166clunk_fid: 163clunk_fid:
167 v9fs_t_clunk(v9ses, fidnum, NULL); 164 v9fs_t_clunk(v9ses, fidnum);
168 return ERR_PTR(err); 165 return ERR_PTR(err);
169} 166}
170 167
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 8835b576f744..945cb368d451 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -4,7 +4,7 @@
4 * Protocol Multiplexer 4 * Protocol Multiplexer
5 * 5 *
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> 6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net> 7 * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
@@ -28,448 +28,943 @@
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/errno.h> 29#include <linux/errno.h>
30#include <linux/fs.h> 30#include <linux/fs.h>
31#include <linux/poll.h>
31#include <linux/kthread.h> 32#include <linux/kthread.h>
32#include <linux/idr.h> 33#include <linux/idr.h>
33 34
34#include "debug.h" 35#include "debug.h"
35#include "v9fs.h" 36#include "v9fs.h"
36#include "9p.h" 37#include "9p.h"
37#include "transport.h"
38#include "conv.h" 38#include "conv.h"
39#include "transport.h"
39#include "mux.h" 40#include "mux.h"
40 41
42#define ERREQFLUSH 1
43#define SCHED_TIMEOUT 10
44#define MAXPOLLWADDR 2
45
46enum {
47 Rworksched = 1, /* read work scheduled or running */
48 Rpending = 2, /* can read */
49 Wworksched = 4, /* write work scheduled or running */
50 Wpending = 8, /* can write */
51};
52
53struct v9fs_mux_poll_task;
54
55struct v9fs_req {
56 int tag;
57 struct v9fs_fcall *tcall;
58 struct v9fs_fcall *rcall;
59 int err;
60 v9fs_mux_req_callback cb;
61 void *cba;
62 struct list_head req_list;
63};
64
65struct v9fs_mux_data {
66 spinlock_t lock;
67 struct list_head mux_list;
68 struct v9fs_mux_poll_task *poll_task;
69 int msize;
70 unsigned char *extended;
71 struct v9fs_transport *trans;
72 struct v9fs_idpool tidpool;
73 int err;
74 wait_queue_head_t equeue;
75 struct list_head req_list;
76 struct list_head unsent_req_list;
77 struct v9fs_fcall *rcall;
78 int rpos;
79 char *rbuf;
80 int wpos;
81 int wsize;
82 char *wbuf;
83 wait_queue_t poll_wait[MAXPOLLWADDR];
84 wait_queue_head_t *poll_waddr[MAXPOLLWADDR];
85 poll_table pt;
86 struct work_struct rq;
87 struct work_struct wq;
88 unsigned long wsched;
89};
90
91struct v9fs_mux_poll_task {
92 struct task_struct *task;
93 struct list_head mux_list;
94 int muxnum;
95};
96
97struct v9fs_mux_rpc {
98 struct v9fs_mux_data *m;
99 struct v9fs_req *req;
100 int err;
101 struct v9fs_fcall *rcall;
102 wait_queue_head_t wqueue;
103};
104
105static int v9fs_poll_proc(void *);
106static void v9fs_read_work(void *);
107static void v9fs_write_work(void *);
108static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
109 poll_table * p);
110static u16 v9fs_mux_get_tag(struct v9fs_mux_data *);
111static void v9fs_mux_put_tag(struct v9fs_mux_data *, u16);
112
113static DECLARE_MUTEX(v9fs_mux_task_lock);
114static struct workqueue_struct *v9fs_mux_wq;
115
116static int v9fs_mux_num;
117static int v9fs_mux_poll_task_num;
118static struct v9fs_mux_poll_task v9fs_mux_poll_tasks[100];
119
120int v9fs_mux_global_init(void)
121{
122 int i;
123
124 for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++)
125 v9fs_mux_poll_tasks[i].task = NULL;
126
127 v9fs_mux_wq = create_workqueue("v9fs");
128 if (!v9fs_mux_wq)
129 return -ENOMEM;
130
131 return 0;
132}
133
134void v9fs_mux_global_exit(void)
135{
136 destroy_workqueue(v9fs_mux_wq);
137}
138
41/** 139/**
42 * dprintcond - print condition of session info 140 * v9fs_mux_calc_poll_procs - calculates the number of polling procs
43 * @v9ses: session info structure 141 * based on the number of mounted v9fs filesystems.
44 * @req: RPC request structure
45 * 142 *
143 * The current implementation returns sqrt of the number of mounts.
46 */ 144 */
145inline int v9fs_mux_calc_poll_procs(int muxnum)
146{
147 int n;
148
149 if (v9fs_mux_poll_task_num)
150 n = muxnum / v9fs_mux_poll_task_num +
151 (muxnum % v9fs_mux_poll_task_num ? 1 : 0);
152 else
153 n = 1;
154
155 if (n > ARRAY_SIZE(v9fs_mux_poll_tasks))
156 n = ARRAY_SIZE(v9fs_mux_poll_tasks);
157
158 return n;
159}
47 160
48static inline int 161static int v9fs_mux_poll_start(struct v9fs_mux_data *m)
49dprintcond(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
50{ 162{
51 dprintk(DEBUG_MUX, "condition: %d, %p\n", v9ses->transport->status, 163 int i, n;
52 req->rcall); 164 struct v9fs_mux_poll_task *vpt, *vptlast;
165 struct task_struct *pproc;
166
167 dprintk(DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, v9fs_mux_num,
168 v9fs_mux_poll_task_num);
169 up(&v9fs_mux_task_lock);
170
171 n = v9fs_mux_calc_poll_procs(v9fs_mux_num + 1);
172 if (n > v9fs_mux_poll_task_num) {
173 for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
174 if (v9fs_mux_poll_tasks[i].task == NULL) {
175 vpt = &v9fs_mux_poll_tasks[i];
176 dprintk(DEBUG_MUX, "create proc %p\n", vpt);
177 pproc = kthread_create(v9fs_poll_proc, vpt,
178 "v9fs-poll");
179
180 if (!IS_ERR(pproc)) {
181 vpt->task = pproc;
182 INIT_LIST_HEAD(&vpt->mux_list);
183 vpt->muxnum = 0;
184 v9fs_mux_poll_task_num++;
185 wake_up_process(vpt->task);
186 }
187 break;
188 }
189 }
190
191 if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks))
192 dprintk(DEBUG_ERROR, "warning: no free poll slots\n");
193 }
194
195 n = (v9fs_mux_num + 1) / v9fs_mux_poll_task_num +
196 ((v9fs_mux_num + 1) % v9fs_mux_poll_task_num ? 1 : 0);
197
198 vptlast = NULL;
199 for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
200 vpt = &v9fs_mux_poll_tasks[i];
201 if (vpt->task != NULL) {
202 vptlast = vpt;
203 if (vpt->muxnum < n) {
204 dprintk(DEBUG_MUX, "put in proc %d\n", i);
205 list_add(&m->mux_list, &vpt->mux_list);
206 vpt->muxnum++;
207 m->poll_task = vpt;
208 memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
209 init_poll_funcptr(&m->pt, v9fs_pollwait);
210 break;
211 }
212 }
213 }
214
215 if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) {
216 if (vptlast == NULL)
217 return -ENOMEM;
218
219 dprintk(DEBUG_MUX, "put in proc %d\n", i);
220 list_add(&m->mux_list, &vptlast->mux_list);
221 vptlast->muxnum++;
222 m->poll_task = vptlast;
223 memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
224 init_poll_funcptr(&m->pt, v9fs_pollwait);
225 }
226
227 v9fs_mux_num++;
228 down(&v9fs_mux_task_lock);
229
53 return 0; 230 return 0;
54} 231}
55 232
233static void v9fs_mux_poll_stop(struct v9fs_mux_data *m)
234{
235 int i;
236 struct v9fs_mux_poll_task *vpt;
237
238 up(&v9fs_mux_task_lock);
239 vpt = m->poll_task;
240 list_del(&m->mux_list);
241 for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
242 if (m->poll_waddr[i] != NULL) {
243 remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]);
244 m->poll_waddr[i] = NULL;
245 }
246 }
247 vpt->muxnum--;
248 if (!vpt->muxnum) {
249 dprintk(DEBUG_MUX, "destroy proc %p\n", vpt);
250 send_sig(SIGKILL, vpt->task, 1);
251 vpt->task = NULL;
252 v9fs_mux_poll_task_num--;
253 }
254 v9fs_mux_num--;
255 down(&v9fs_mux_task_lock);
256}
257
56/** 258/**
57 * xread - force read of a certain number of bytes 259 * v9fs_mux_init - allocate and initialize the per-session mux data
58 * @v9ses: session info structure 260 * Creates the polling task if this is the first session.
59 * @ptr: pointer to buffer
60 * @sz: number of bytes to read
61 * 261 *
62 * Chuck Cranor CS-533 project1 262 * @trans - transport structure
263 * @msize - maximum message size
264 * @extended - pointer to the extended flag
63 */ 265 */
64 266struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
65static int xread(struct v9fs_session_info *v9ses, void *ptr, unsigned long sz) 267 unsigned char *extended)
66{ 268{
67 int rd = 0; 269 int i, n;
68 int ret = 0; 270 struct v9fs_mux_data *m, *mtmp;
69 while (rd < sz) { 271
70 ret = v9ses->transport->read(v9ses->transport, ptr, sz - rd); 272 dprintk(DEBUG_MUX, "transport %p msize %d\n", trans, msize);
71 if (ret <= 0) { 273 m = kmalloc(sizeof(struct v9fs_mux_data), GFP_KERNEL);
72 dprintk(DEBUG_ERROR, "xread errno %d\n", ret); 274 if (!m)
73 return ret; 275 return ERR_PTR(-ENOMEM);
276
277 spin_lock_init(&m->lock);
278 INIT_LIST_HEAD(&m->mux_list);
279 m->msize = msize;
280 m->extended = extended;
281 m->trans = trans;
282 idr_init(&m->tidpool.pool);
283 init_MUTEX(&m->tidpool.lock);
284 m->err = 0;
285 init_waitqueue_head(&m->equeue);
286 INIT_LIST_HEAD(&m->req_list);
287 INIT_LIST_HEAD(&m->unsent_req_list);
288 m->rcall = NULL;
289 m->rpos = 0;
290 m->rbuf = NULL;
291 m->wpos = m->wsize = 0;
292 m->wbuf = NULL;
293 INIT_WORK(&m->rq, v9fs_read_work, m);
294 INIT_WORK(&m->wq, v9fs_write_work, m);
295 m->wsched = 0;
296 memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
297 m->poll_task = NULL;
298 n = v9fs_mux_poll_start(m);
299 if (n)
300 return ERR_PTR(n);
301
302 n = trans->poll(trans, &m->pt);
303 if (n & POLLIN) {
304 dprintk(DEBUG_MUX, "mux %p can read\n", m);
305 set_bit(Rpending, &m->wsched);
306 }
307
308 if (n & POLLOUT) {
309 dprintk(DEBUG_MUX, "mux %p can write\n", m);
310 set_bit(Wpending, &m->wsched);
311 }
312
313 for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
314 if (IS_ERR(m->poll_waddr[i])) {
315 v9fs_mux_poll_stop(m);
316 mtmp = (void *)m->poll_waddr; /* the error code */
317 kfree(m);
318 m = mtmp;
319 break;
74 } 320 }
75 rd += ret;
76 ptr += ret;
77 } 321 }
78 return (rd); 322
323 return m;
79} 324}
80 325
81/** 326/**
82 * read_message - read a full 9P2000 fcall packet 327 * v9fs_mux_destroy - cancels all pending requests and frees mux resources
83 * @v9ses: session info structure
84 * @rcall: fcall structure to read into
85 * @rcalllen: size of fcall buffer
86 *
87 */ 328 */
329void v9fs_mux_destroy(struct v9fs_mux_data *m)
330{
331 dprintk(DEBUG_MUX, "mux %p prev %p next %p\n", m,
332 m->mux_list.prev, m->mux_list.next);
333 v9fs_mux_cancel(m, -ECONNRESET);
334
335 if (!list_empty(&m->req_list)) {
336 /* wait until all processes waiting on this session exit */
337 dprintk(DEBUG_MUX, "mux %p waiting for empty request queue\n",
338 m);
339 wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000);
340 dprintk(DEBUG_MUX, "mux %p request queue empty: %d\n", m,
341 list_empty(&m->req_list));
342 }
343
344 v9fs_mux_poll_stop(m);
345 m->trans = NULL;
346
347 kfree(m);
348}
88 349
89static int 350/**
90read_message(struct v9fs_session_info *v9ses, 351 * v9fs_pollwait - called by files poll operation to add v9fs-poll task
91 struct v9fs_fcall *rcall, int rcalllen) 352 * to files wait queue
353 */
354static void
355v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
356 poll_table * p)
92{ 357{
93 unsigned char buf[4]; 358 int i;
94 void *data; 359 struct v9fs_mux_data *m;
95 int size = 0; 360
96 int res = 0; 361 m = container_of(p, struct v9fs_mux_data, pt);
97 362 for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++)
98 res = xread(v9ses, buf, sizeof(buf)); 363 if (m->poll_waddr[i] == NULL)
99 if (res < 0) { 364 break;
100 dprintk(DEBUG_ERROR, 365
101 "Reading of count field failed returned: %d\n", res); 366 if (i >= ARRAY_SIZE(m->poll_waddr)) {
102 return res; 367 dprintk(DEBUG_ERROR, "not enough wait_address slots\n");
368 return;
103 } 369 }
104 370
105 if (res < 4) { 371 m->poll_waddr[i] = wait_address;
106 dprintk(DEBUG_ERROR, 372
107 "Reading of count field failed returned: %d\n", res); 373 if (!wait_address) {
108 return -EIO; 374 dprintk(DEBUG_ERROR, "no wait_address\n");
375 m->poll_waddr[i] = ERR_PTR(-EIO);
376 return;
109 } 377 }
110 378
111 size = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24); 379 init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task);
112 dprintk(DEBUG_MUX, "got a packet count: %d\n", size); 380 add_wait_queue(wait_address, &m->poll_wait[i]);
381}
382
383/**
384 * v9fs_poll_mux - polls a mux and schedules read or write works if necessary
385 */
386static inline void v9fs_poll_mux(struct v9fs_mux_data *m)
387{
388 int n;
113 389
114 /* adjust for the four bytes of size */ 390 if (m->err < 0)
115 size -= 4; 391 return;
116 392
117 if (size > v9ses->maxdata) { 393 n = m->trans->poll(m->trans, NULL);
118 dprintk(DEBUG_ERROR, "packet too big: %d\n", size); 394 if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
119 return -E2BIG; 395 dprintk(DEBUG_MUX, "error mux %p err %d\n", m, n);
396 if (n >= 0)
397 n = -ECONNRESET;
398 v9fs_mux_cancel(m, n);
120 } 399 }
121 400
122 data = kmalloc(size, GFP_KERNEL); 401 if (n & POLLIN) {
123 if (!data) { 402 set_bit(Rpending, &m->wsched);
124 eprintk(KERN_WARNING, "out of memory\n"); 403 dprintk(DEBUG_MUX, "mux %p can read\n", m);
125 return -ENOMEM; 404 if (!test_and_set_bit(Rworksched, &m->wsched)) {
405 dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
406 queue_work(v9fs_mux_wq, &m->rq);
407 }
126 } 408 }
127 409
128 res = xread(v9ses, data, size); 410 if (n & POLLOUT) {
129 if (res < size) { 411 set_bit(Wpending, &m->wsched);
130 dprintk(DEBUG_ERROR, "Reading of fcall failed returned: %d\n", 412 dprintk(DEBUG_MUX, "mux %p can write\n", m);
131 res); 413 if ((m->wsize || !list_empty(&m->unsent_req_list))
132 kfree(data); 414 && !test_and_set_bit(Wworksched, &m->wsched)) {
133 return res; 415 dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
416 queue_work(v9fs_mux_wq, &m->wq);
417 }
134 } 418 }
419}
420
421/**
422 * v9fs_poll_proc - polls all v9fs transports for new events and queues
423 * the appropriate work to the work queue
424 */
425static int v9fs_poll_proc(void *a)
426{
427 struct v9fs_mux_data *m, *mtmp;
428 struct v9fs_mux_poll_task *vpt;
135 429
136 /* we now have an in-memory string that is the reply. 430 vpt = a;
137 * deserialize it. There is very little to go wrong at this point 431 dprintk(DEBUG_MUX, "start %p %p\n", current, vpt);
138 * save for v9fs_alloc errors. 432 allow_signal(SIGKILL);
139 */ 433 while (!kthread_should_stop()) {
140 res = v9fs_deserialize_fcall(v9ses, size, data, v9ses->maxdata, 434 set_current_state(TASK_INTERRUPTIBLE);
141 rcall, rcalllen); 435 if (signal_pending(current))
436 break;
142 437
143 kfree(data); 438 list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) {
439 v9fs_poll_mux(m);
440 }
144 441
145 if (res < 0) 442 dprintk(DEBUG_MUX, "sleeping...\n");
146 return res; 443 schedule_timeout(SCHED_TIMEOUT * HZ);
444 }
147 445
446 __set_current_state(TASK_RUNNING);
447 dprintk(DEBUG_MUX, "finish\n");
148 return 0; 448 return 0;
149} 449}
150 450
151/** 451/**
152 * v9fs_recv - receive an RPC response for a particular tag 452 * v9fs_write_work - called when a transport can send some data
153 * @v9ses: session info structure
154 * @req: RPC request structure
155 *
156 */ 453 */
157 454static void v9fs_write_work(void *a)
158static int v9fs_recv(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
159{ 455{
160 int ret = 0; 456 int n, err;
457 struct v9fs_mux_data *m;
458 struct v9fs_req *req;
161 459
162 dprintk(DEBUG_MUX, "waiting for response: %d\n", req->tcall->tag); 460 m = a;
163 ret = wait_event_interruptible(v9ses->read_wait,
164 ((v9ses->transport->status != Connected) ||
165 (req->rcall != 0) || (req->err < 0) ||
166 dprintcond(v9ses, req)));
167 461
168 dprintk(DEBUG_MUX, "got it: rcall %p\n", req->rcall); 462 if (m->err < 0) {
463 clear_bit(Wworksched, &m->wsched);
464 return;
465 }
169 466
170 spin_lock(&v9ses->muxlock); 467 if (!m->wsize) {
171 list_del(&req->next); 468 if (list_empty(&m->unsent_req_list)) {
172 spin_unlock(&v9ses->muxlock); 469 clear_bit(Wworksched, &m->wsched);
470 return;
471 }
173 472
174 if (req->err < 0) 473 spin_lock(&m->lock);
175 return req->err; 474 req =
475 list_entry(m->unsent_req_list.next, struct v9fs_req,
476 req_list);
477 list_move_tail(&req->req_list, &m->req_list);
478 m->wbuf = req->tcall->sdata;
479 m->wsize = req->tcall->size;
480 m->wpos = 0;
481 dump_data(m->wbuf, m->wsize);
482 spin_unlock(&m->lock);
483 }
176 484
177 if (v9ses->transport->status == Disconnected) 485 dprintk(DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, m->wsize);
178 return -ECONNRESET; 486 clear_bit(Wpending, &m->wsched);
487 err = m->trans->write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos);
488 dprintk(DEBUG_MUX, "mux %p sent %d bytes\n", m, err);
489 if (err == -EAGAIN) {
490 clear_bit(Wworksched, &m->wsched);
491 return;
492 }
179 493
180 return ret; 494 if (err <= 0)
181} 495 goto error;
182 496
183/** 497 m->wpos += err;
184 * v9fs_send - send a 9P request 498 if (m->wpos == m->wsize)
185 * @v9ses: session info structure 499 m->wpos = m->wsize = 0;
186 * @req: RPC request to send 500
187 * 501 if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) {
188 */ 502 if (test_and_clear_bit(Wpending, &m->wsched))
503 n = POLLOUT;
504 else
505 n = m->trans->poll(m->trans, NULL);
506
507 if (n & POLLOUT) {
508 dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
509 queue_work(v9fs_mux_wq, &m->wq);
510 } else
511 clear_bit(Wworksched, &m->wsched);
512 } else
513 clear_bit(Wworksched, &m->wsched);
514
515 return;
189 516
190static int v9fs_send(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req) 517 error:
518 v9fs_mux_cancel(m, err);
519 clear_bit(Wworksched, &m->wsched);
520}
521
522static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
191{ 523{
192 int ret = -1; 524 int ecode, tag;
193 void *data = NULL; 525 struct v9fs_str *ename;
194 struct v9fs_fcall *tcall = req->tcall;
195 526
196 data = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL); 527 tag = req->tag;
197 if (!data) 528 if (req->rcall->id == RERROR && !req->err) {
198 return -ENOMEM; 529 ecode = req->rcall->params.rerror.errno;
530 ename = &req->rcall->params.rerror.error;
199 531
200 tcall->size = 0; /* enforce size recalculation */ 532 dprintk(DEBUG_MUX, "Rerror %.*s\n", ename->len, ename->str);
201 ret =
202 v9fs_serialize_fcall(v9ses, tcall, data,
203 v9ses->maxdata + V9FS_IOHDRSZ);
204 if (ret < 0)
205 goto free_data;
206 533
207 spin_lock(&v9ses->muxlock); 534 if (*m->extended)
208 list_add(&req->next, &v9ses->mux_fcalls); 535 req->err = -ecode;
209 spin_unlock(&v9ses->muxlock);
210 536
211 dprintk(DEBUG_MUX, "sending message: tag %d size %d\n", tcall->tag, 537 if (!req->err) {
212 tcall->size); 538 req->err = v9fs_errstr2errno(ename->str, ename->len);
213 ret = v9ses->transport->write(v9ses->transport, data, tcall->size);
214 539
215 if (ret != tcall->size) { 540 if (!req->err) { /* string match failed */
216 spin_lock(&v9ses->muxlock); 541 PRINT_FCALL_ERROR("unknown error", req->rcall);
217 list_del(&req->next); 542 }
218 kfree(req->rcall); 543
544 if (!req->err)
545 req->err = -ESERVERFAULT;
546 }
547 } else if (req->tcall && req->rcall->id != req->tcall->id + 1) {
548 dprintk(DEBUG_ERROR, "fcall mismatch: expected %d, got %d\n",
549 req->tcall->id + 1, req->rcall->id);
550 if (!req->err)
551 req->err = -EIO;
552 }
219 553
220 spin_unlock(&v9ses->muxlock); 554 if (req->cb && req->err != ERREQFLUSH) {
221 if (ret >= 0) 555 dprintk(DEBUG_MUX, "calling callback tcall %p rcall %p\n",
222 ret = -EREMOTEIO; 556 req->tcall, req->rcall);
557
558 (*req->cb) (req->cba, req->tcall, req->rcall, req->err);
559 req->cb = NULL;
223 } else 560 } else
224 ret = 0; 561 kfree(req->rcall);
225 562
226 free_data: 563 v9fs_mux_put_tag(m, tag);
227 kfree(data); 564
228 return ret; 565 wake_up(&m->equeue);
566 kfree(req);
229} 567}
230 568
231/** 569/**
232 * v9fs_mux_rpc - send a request, receive a response 570 * v9fs_read_work - called when there is some data to be read from a transport
233 * @v9ses: session info structure
234 * @tcall: fcall to send
235 * @rcall: buffer to place response into
236 *
237 */ 571 */
238 572static void v9fs_read_work(void *a)
239long
240v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall,
241 struct v9fs_fcall **rcall)
242{ 573{
243 int tid = -1; 574 int n, err;
244 struct v9fs_fcall *fcall = NULL; 575 struct v9fs_mux_data *m;
245 struct v9fs_rpcreq req; 576 struct v9fs_req *req, *rptr, *rreq;
246 int ret = -1; 577 struct v9fs_fcall *rcall;
247 578 char *rbuf;
248 if (!v9ses) 579
249 return -EINVAL; 580 m = a;
250 581
251 if (!v9ses->transport || v9ses->transport->status != Connected) 582 if (m->err < 0)
252 return -EIO; 583 return;
584
585 rcall = NULL;
586 dprintk(DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos);
587
588 if (!m->rcall) {
589 m->rcall =
590 kmalloc(sizeof(struct v9fs_fcall) + m->msize, GFP_KERNEL);
591 if (!m->rcall) {
592 err = -ENOMEM;
593 goto error;
594 }
253 595
254 if (rcall) 596 m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
255 *rcall = NULL; 597 m->rpos = 0;
598 }
256 599
257 if (tcall->id != TVERSION) { 600 clear_bit(Rpending, &m->wsched);
258 tid = v9fs_get_idpool(&v9ses->tidpool); 601 err = m->trans->read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos);
259 if (tid < 0) 602 dprintk(DEBUG_MUX, "mux %p got %d bytes\n", m, err);
260 return -ENOMEM; 603 if (err == -EAGAIN) {
604 clear_bit(Rworksched, &m->wsched);
605 return;
261 } 606 }
262 607
263 tcall->tag = tid; 608 if (err <= 0)
609 goto error;
264 610
265 req.tcall = tcall; 611 m->rpos += err;
266 req.err = 0; 612 while (m->rpos > 4) {
267 req.rcall = NULL; 613 n = le32_to_cpu(*(__le32 *) m->rbuf);
614 if (n >= m->msize) {
615 dprintk(DEBUG_ERROR,
616 "requested packet size too big: %d\n", n);
617 err = -EIO;
618 goto error;
619 }
268 620
269 ret = v9fs_send(v9ses, &req); 621 if (m->rpos < n)
622 break;
270 623
271 if (ret < 0) { 624 dump_data(m->rbuf, n);
272 if (tcall->id != TVERSION) 625 err =
273 v9fs_put_idpool(tid, &v9ses->tidpool); 626 v9fs_deserialize_fcall(m->rbuf, n, m->rcall, *m->extended);
274 dprintk(DEBUG_MUX, "error %d\n", ret); 627 if (err < 0) {
275 return ret; 628 goto error;
276 } 629 }
630
631 rcall = m->rcall;
632 rbuf = m->rbuf;
633 if (m->rpos > n) {
634 m->rcall = kmalloc(sizeof(struct v9fs_fcall) + m->msize,
635 GFP_KERNEL);
636 if (!m->rcall) {
637 err = -ENOMEM;
638 goto error;
639 }
277 640
278 ret = v9fs_recv(v9ses, &req); 641 m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
279 642 memmove(m->rbuf, rbuf + n, m->rpos - n);
280 fcall = req.rcall; 643 m->rpos -= n;
281 644 } else {
282 dprintk(DEBUG_MUX, "received: tag=%x, ret=%d\n", tcall->tag, ret); 645 m->rcall = NULL;
283 if (ret == -ERESTARTSYS) { 646 m->rbuf = NULL;
284 if (v9ses->transport->status != Disconnected 647 m->rpos = 0;
285 && tcall->id != TFLUSH) {
286 unsigned long flags;
287
288 dprintk(DEBUG_MUX, "flushing the tag: %d\n",
289 tcall->tag);
290 clear_thread_flag(TIF_SIGPENDING);
291 v9fs_t_flush(v9ses, tcall->tag);
292 spin_lock_irqsave(&current->sighand->siglock, flags);
293 recalc_sigpending();
294 spin_unlock_irqrestore(&current->sighand->siglock,
295 flags);
296 dprintk(DEBUG_MUX, "flushing done\n");
297 } 648 }
298 649
299 goto release_req; 650 dprintk(DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, rcall->id,
300 } else if (ret < 0) 651 rcall->tag);
301 goto release_req; 652
302 653 req = NULL;
303 if (!fcall) 654 spin_lock(&m->lock);
304 ret = -EIO; 655 list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
305 else { 656 if (rreq->tag == rcall->tag) {
306 if (fcall->id == RERROR) { 657 req = rreq;
307 ret = v9fs_errstr2errno(fcall->params.rerror.error); 658 req->rcall = rcall;
308 if (ret == 0) { /* string match failed */ 659 list_del(&req->req_list);
309 if (fcall->params.rerror.errno) 660 spin_unlock(&m->lock);
310 ret = -(fcall->params.rerror.errno); 661 process_request(m, req);
311 else 662 break;
312 ret = -ESERVERFAULT;
313 } 663 }
314 } else if (fcall->id != tcall->id + 1) { 664
315 dprintk(DEBUG_ERROR, 665 }
316 "fcall mismatch: expected %d, got %d\n", 666
317 tcall->id + 1, fcall->id); 667 if (!req) {
318 ret = -EIO; 668 spin_unlock(&m->lock);
669 if (err >= 0 && rcall->id != RFLUSH)
670 dprintk(DEBUG_ERROR,
671 "unexpected response mux %p id %d tag %d\n",
672 m, rcall->id, rcall->tag);
673 kfree(rcall);
319 } 674 }
320 } 675 }
321 676
322 release_req: 677 if (!list_empty(&m->req_list)) {
323 if (tcall->id != TVERSION) 678 if (test_and_clear_bit(Rpending, &m->wsched))
324 v9fs_put_idpool(tid, &v9ses->tidpool); 679 n = POLLIN;
325 if (rcall) 680 else
326 *rcall = fcall; 681 n = m->trans->poll(m->trans, NULL);
327 else 682
328 kfree(fcall); 683 if (n & POLLIN) {
684 dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
685 queue_work(v9fs_mux_wq, &m->rq);
686 } else
687 clear_bit(Rworksched, &m->wsched);
688 } else
689 clear_bit(Rworksched, &m->wsched);
690
691 return;
329 692
330 return ret; 693 error:
694 v9fs_mux_cancel(m, err);
695 clear_bit(Rworksched, &m->wsched);
331} 696}
332 697
333/** 698/**
334 * v9fs_mux_cancel_requests - cancels all pending requests 699 * v9fs_send_request - send 9P request
700 * The function can sleep until the request is scheduled for sending.
701 * The function can be interrupted. Return from the function is not
702 * a guarantee that the request is sent succesfully. Can return errors
703 * that can be retrieved by PTR_ERR macros.
335 * 704 *
336 * @v9ses: session info structure 705 * @m: mux data
337 * @err: error code to return to the requests 706 * @tc: request to be sent
707 * @cb: callback function to call when response is received
708 * @cba: parameter to pass to the callback function
338 */ 709 */
339void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err) 710static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
711 struct v9fs_fcall *tc,
712 v9fs_mux_req_callback cb, void *cba)
340{ 713{
341 struct v9fs_rpcreq *rptr; 714 int n;
342 struct v9fs_rpcreq *rreq; 715 struct v9fs_req *req;
343 716
344 dprintk(DEBUG_MUX, " %d\n", err); 717 dprintk(DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current,
345 spin_lock(&v9ses->muxlock); 718 tc, tc->id);
346 list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) { 719 if (m->err < 0)
347 rreq->err = err; 720 return ERR_PTR(m->err);
348 }
349 spin_unlock(&v9ses->muxlock);
350 wake_up_all(&v9ses->read_wait);
351}
352 721
353/** 722 req = kmalloc(sizeof(struct v9fs_req), GFP_KERNEL);
354 * v9fs_recvproc - kproc to handle demultiplexing responses 723 if (!req)
355 * @data: session info structure 724 return ERR_PTR(-ENOMEM);
356 *
357 */
358 725
359static int v9fs_recvproc(void *data) 726 if (tc->id == TVERSION)
360{ 727 n = V9FS_NOTAG;
361 struct v9fs_session_info *v9ses = (struct v9fs_session_info *)data; 728 else
362 struct v9fs_fcall *rcall = NULL; 729 n = v9fs_mux_get_tag(m);
363 struct v9fs_rpcreq *rptr;
364 struct v9fs_rpcreq *req;
365 struct v9fs_rpcreq *rreq;
366 int err = 0;
367 730
368 allow_signal(SIGKILL); 731 if (n < 0)
369 set_current_state(TASK_INTERRUPTIBLE); 732 return ERR_PTR(-ENOMEM);
370 complete(&v9ses->proccmpl);
371 while (!kthread_should_stop() && err >= 0) {
372 req = rptr = rreq = NULL;
373
374 rcall = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
375 if (!rcall) {
376 eprintk(KERN_ERR, "no memory for buffers\n");
377 break;
378 }
379 733
380 err = read_message(v9ses, rcall, v9ses->maxdata + V9FS_IOHDRSZ); 734 v9fs_set_tag(tc, n);
381 spin_lock(&v9ses->muxlock);
382 if (err < 0) {
383 list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
384 rreq->err = err;
385 }
386 if(err != -ERESTARTSYS)
387 eprintk(KERN_ERR,
388 "Transport error while reading message %d\n", err);
389 } else {
390 list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
391 if (rreq->tcall->tag == rcall->tag) {
392 req = rreq;
393 req->rcall = rcall;
394 break;
395 }
396 }
397 }
398 735
399 if (req && (req->tcall->id == TFLUSH)) { 736 req->tag = n;
400 struct v9fs_rpcreq *treq = NULL; 737 req->tcall = tc;
401 list_for_each_entry_safe(treq, rptr, &v9ses->mux_fcalls, next) { 738 req->rcall = NULL;
402 if (treq->tcall->tag == 739 req->err = 0;
403 req->tcall->params.tflush.oldtag) { 740 req->cb = cb;
404 list_del(&rptr->next); 741 req->cba = cba;
405 kfree(treq->rcall); 742
406 break; 743 spin_lock(&m->lock);
407 } 744 list_add_tail(&req->req_list, &m->unsent_req_list);
745 spin_unlock(&m->lock);
746
747 if (test_and_clear_bit(Wpending, &m->wsched))
748 n = POLLOUT;
749 else
750 n = m->trans->poll(m->trans, NULL);
751
752 if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
753 queue_work(v9fs_mux_wq, &m->wq);
754
755 return req;
756}
757
758static inline void
759v9fs_mux_flush_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc,
760 int err)
761{
762 v9fs_mux_req_callback cb;
763 int tag;
764 struct v9fs_mux_data *m;
765 struct v9fs_req *req, *rptr;
766
767 m = a;
768 dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, tc,
769 rc, err, tc->params.tflush.oldtag);
770
771 spin_lock(&m->lock);
772 cb = NULL;
773 tag = tc->params.tflush.oldtag;
774 list_for_each_entry_safe(req, rptr, &m->req_list, req_list) {
775 if (req->tag == tag) {
776 list_del(&req->req_list);
777 if (req->cb) {
778 cb = req->cb;
779 req->cb = NULL;
780 spin_unlock(&m->lock);
781 (*cb) (req->cba, req->tcall, req->rcall,
782 req->err);
408 } 783 }
784 kfree(req);
785 wake_up(&m->equeue);
786 break;
409 } 787 }
788 }
410 789
411 spin_unlock(&v9ses->muxlock); 790 if (!cb)
791 spin_unlock(&m->lock);
412 792
413 if (!req) { 793 v9fs_mux_put_tag(m, tag);
414 if (err >= 0) 794 kfree(tc);
415 dprintk(DEBUG_ERROR, 795 kfree(rc);
416 "unexpected response: id %d tag %d\n", 796}
417 rcall->id, rcall->tag);
418 797
419 kfree(rcall); 798static void
420 } 799v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req)
800{
801 struct v9fs_fcall *fc;
421 802
422 wake_up_all(&v9ses->read_wait); 803 dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag);
423 set_current_state(TASK_INTERRUPTIBLE); 804
805 fc = v9fs_create_tflush(req->tag);
806 v9fs_send_request(m, fc, v9fs_mux_flush_cb, m);
807}
808
809static void
810v9fs_mux_rpc_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc, int err)
811{
812 struct v9fs_mux_rpc *r;
813
814 if (err == ERREQFLUSH) {
815 dprintk(DEBUG_MUX, "err req flush\n");
816 return;
424 } 817 }
425 818
426 v9ses->transport->close(v9ses->transport); 819 r = a;
820 dprintk(DEBUG_MUX, "mux %p req %p tc %p rc %p err %d\n", r->m, r->req,
821 tc, rc, err);
822 r->rcall = rc;
823 r->err = err;
824 wake_up(&r->wqueue);
825}
427 826
428 /* Inform all pending processes about the failure */ 827/**
429 wake_up_all(&v9ses->read_wait); 828 * v9fs_mux_rpc - sends 9P request and waits until a response is available.
829 * The function can be interrupted.
830 * @m: mux data
831 * @tc: request to be sent
832 * @rc: pointer where a pointer to the response is stored
833 */
834int
835v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
836 struct v9fs_fcall **rc)
837{
838 int err;
839 unsigned long flags;
840 struct v9fs_req *req;
841 struct v9fs_mux_rpc r;
842
843 r.err = 0;
844 r.rcall = NULL;
845 r.m = m;
846 init_waitqueue_head(&r.wqueue);
847
848 if (rc)
849 *rc = NULL;
850
851 req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r);
852 if (IS_ERR(req)) {
853 err = PTR_ERR(req);
854 dprintk(DEBUG_MUX, "error %d\n", err);
855 return PTR_ERR(req);
856 }
430 857
431 if (signal_pending(current)) 858 r.req = req;
432 complete(&v9ses->proccmpl); 859 dprintk(DEBUG_MUX, "mux %p tc %p tag %d rpc %p req %p\n", m, tc,
860 req->tag, &r, req);
861 err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0);
862 if (r.err < 0)
863 err = r.err;
864
865 if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) {
866 spin_lock(&m->lock);
867 req->tcall = NULL;
868 req->err = ERREQFLUSH;
869 spin_unlock(&m->lock);
870
871 clear_thread_flag(TIF_SIGPENDING);
872 v9fs_mux_flush_request(m, req);
873 spin_lock_irqsave(&current->sighand->siglock, flags);
874 recalc_sigpending();
875 spin_unlock_irqrestore(&current->sighand->siglock, flags);
876 }
433 877
434 dprintk(DEBUG_MUX, "recvproc: end\n"); 878 if (!err) {
435 v9ses->recvproc = NULL; 879 if (r.rcall)
880 dprintk(DEBUG_MUX, "got response id %d tag %d\n",
881 r.rcall->id, r.rcall->tag);
882
883 if (rc)
884 *rc = r.rcall;
885 else
886 kfree(r.rcall);
887 } else {
888 kfree(r.rcall);
889 dprintk(DEBUG_MUX, "got error %d\n", err);
890 if (err > 0)
891 err = -EIO;
892 }
436 893
437 return err >= 0; 894 return err;
438} 895}
439 896
440/** 897/**
441 * v9fs_mux_init - initialize multiplexer (spawn kproc) 898 * v9fs_mux_rpcnb - sends 9P request without waiting for response.
442 * @v9ses: session info structure 899 * @m: mux data
443 * @dev_name: mount device information (to create unique kproc) 900 * @tc: request to be sent
444 * 901 * @cb: callback function to be called when response arrives
902 * @cba: value to pass to the callback function
445 */ 903 */
904int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
905 v9fs_mux_req_callback cb, void *a)
906{
907 int err;
908 struct v9fs_req *req;
909
910 req = v9fs_send_request(m, tc, cb, a);
911 if (IS_ERR(req)) {
912 err = PTR_ERR(req);
913 dprintk(DEBUG_MUX, "error %d\n", err);
914 return PTR_ERR(req);
915 }
916
917 dprintk(DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag);
918 return 0;
919}
446 920
447int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name) 921/**
922 * v9fs_mux_cancel - cancel all pending requests with error
923 * @m: mux data
924 * @err: error code
925 */
926void v9fs_mux_cancel(struct v9fs_mux_data *m, int err)
448{ 927{
449 char procname[60]; 928 struct v9fs_req *req, *rtmp;
450 929 LIST_HEAD(cancel_list);
451 strncpy(procname, dev_name, sizeof(procname)); 930
452 procname[sizeof(procname) - 1] = 0; 931 dprintk(DEBUG_MUX, "mux %p err %d\n", m, err);
453 932 m->err = err;
454 init_waitqueue_head(&v9ses->read_wait); 933 spin_lock(&m->lock);
455 init_completion(&v9ses->fcread); 934 list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
456 init_completion(&v9ses->proccmpl); 935 list_move(&req->req_list, &cancel_list);
457 spin_lock_init(&v9ses->muxlock);
458 INIT_LIST_HEAD(&v9ses->mux_fcalls);
459 v9ses->recvproc = NULL;
460 v9ses->curfcall = NULL;
461
462 v9ses->recvproc = kthread_create(v9fs_recvproc, v9ses,
463 "v9fs_recvproc %s", procname);
464
465 if (IS_ERR(v9ses->recvproc)) {
466 eprintk(KERN_ERR, "cannot create receiving thread\n");
467 v9fs_session_close(v9ses);
468 return -ECONNABORTED;
469 } 936 }
937 spin_unlock(&m->lock);
470 938
471 wake_up_process(v9ses->recvproc); 939 list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
472 wait_for_completion(&v9ses->proccmpl); 940 list_del(&req->req_list);
941 if (!req->err)
942 req->err = err;
473 943
474 return 0; 944 if (req->cb)
945 (*req->cb) (req->cba, req->tcall, req->rcall, req->err);
946 else
947 kfree(req->rcall);
948
949 kfree(req);
950 }
951
952 wake_up(&m->equeue);
953}
954
955static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m)
956{
957 int tag;
958
959 tag = v9fs_get_idpool(&m->tidpool);
960 if (tag < 0)
961 return V9FS_NOTAG;
962 else
963 return (u16) tag;
964}
965
966static void v9fs_mux_put_tag(struct v9fs_mux_data *m, u16 tag)
967{
968 if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tidpool))
969 v9fs_put_idpool(tag, &m->tidpool);
475} 970}
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
index 4994cb10badf..9473b84f24b2 100644
--- a/fs/9p/mux.h
+++ b/fs/9p/mux.h
@@ -3,6 +3,7 @@
3 * 3 *
4 * Multiplexer Definitions 4 * Multiplexer Definitions
5 * 5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> 7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * 8 *
8 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
@@ -23,19 +24,35 @@
23 * 24 *
24 */ 25 */
25 26
26/* structure to manage each RPC transaction */ 27struct v9fs_mux_data;
27 28
28struct v9fs_rpcreq { 29/**
29 struct v9fs_fcall *tcall; 30 * v9fs_mux_req_callback - callback function that is called when the
30 struct v9fs_fcall *rcall; 31 * response of a request is received. The callback is called from
31 int err; /* error code if response failed */ 32 * a workqueue and shouldn't block.
33 *
34 * @a - the pointer that was specified when the request was send to be
35 * passed to the callback
36 * @tc - request call
37 * @rc - response call
38 * @err - error code (non-zero if error occured)
39 */
40typedef void (*v9fs_mux_req_callback)(void *a, struct v9fs_fcall *tc,
41 struct v9fs_fcall *rc, int err);
42
43int v9fs_mux_global_init(void);
44void v9fs_mux_global_exit(void);
32 45
33 /* XXX - could we put scatter/gather buffers here? */ 46struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
47 unsigned char *extended);
48void v9fs_mux_destroy(struct v9fs_mux_data *);
34 49
35 struct list_head next; 50int v9fs_mux_send(struct v9fs_mux_data *m, struct v9fs_fcall *tc);
36}; 51struct v9fs_fcall *v9fs_mux_recv(struct v9fs_mux_data *m);
52int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc);
53int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
54 v9fs_mux_req_callback cb, void *a);
37 55
38int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name); 56void v9fs_mux_flush(struct v9fs_mux_data *m, int sendflush);
39long v9fs_mux_rpc(struct v9fs_session_info *v9ses, 57void v9fs_mux_cancel(struct v9fs_mux_data *m, int err);
40 struct v9fs_fcall *tcall, struct v9fs_fcall **rcall); 58int v9fs_errstr2errno(char *errstr, int len);
41void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err);
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
index 63b58ce98ff4..1a28ef97a3d1 100644
--- a/fs/9p/trans_fd.c
+++ b/fs/9p/trans_fd.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * File Descriptor Transport Layer 4 * File Descriptor Transport Layer
5 * 5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
6 * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com> 7 * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
7 * 8 *
8 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
@@ -106,9 +107,6 @@ v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
106 return -ENOPROTOOPT; 107 return -ENOPROTOOPT;
107 } 108 }
108 109
109 sema_init(&trans->writelock, 1);
110 sema_init(&trans->readlock, 1);
111
112 ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL); 110 ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL);
113 111
114 if (!ts) 112 if (!ts)
@@ -148,12 +146,12 @@ static void v9fs_fd_close(struct v9fs_transport *trans)
148 if (!trans) 146 if (!trans)
149 return; 147 return;
150 148
151 trans->status = Disconnected; 149 ts = xchg(&trans->priv, NULL);
152 ts = trans->priv;
153 150
154 if (!ts) 151 if (!ts)
155 return; 152 return;
156 153
154 trans->status = Disconnected;
157 if (ts->in_file) 155 if (ts->in_file)
158 fput(ts->in_file); 156 fput(ts->in_file);
159 157
@@ -163,10 +161,55 @@ static void v9fs_fd_close(struct v9fs_transport *trans)
163 kfree(ts); 161 kfree(ts);
164} 162}
165 163
164static unsigned int
165v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt)
166{
167 int ret, n;
168 struct v9fs_trans_fd *ts;
169 mm_segment_t oldfs;
170
171 if (!trans)
172 return -EIO;
173
174 ts = trans->priv;
175 if (trans->status != Connected || !ts)
176 return -EIO;
177
178 oldfs = get_fs();
179 set_fs(get_ds());
180
181 if (!ts->in_file->f_op || !ts->in_file->f_op->poll) {
182 ret = -EIO;
183 goto end;
184 }
185
186 ret = ts->in_file->f_op->poll(ts->in_file, pt);
187
188 if (ts->out_file != ts->in_file) {
189 if (!ts->out_file->f_op || !ts->out_file->f_op->poll) {
190 ret = -EIO;
191 goto end;
192 }
193
194 n = ts->out_file->f_op->poll(ts->out_file, pt);
195
196 ret &= ~POLLOUT;
197 n &= ~POLLIN;
198
199 ret |= n;
200 }
201
202end:
203 set_fs(oldfs);
204 return ret;
205}
206
207
166struct v9fs_transport v9fs_trans_fd = { 208struct v9fs_transport v9fs_trans_fd = {
167 .init = v9fs_fd_init, 209 .init = v9fs_fd_init,
168 .write = v9fs_fd_send, 210 .write = v9fs_fd_send,
169 .read = v9fs_fd_recv, 211 .read = v9fs_fd_recv,
170 .close = v9fs_fd_close, 212 .close = v9fs_fd_close,
213 .poll = v9fs_fd_poll,
171}; 214};
172 215
diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c
index 6a9a75d40f73..44e830697acb 100644
--- a/fs/9p/trans_sock.c
+++ b/fs/9p/trans_sock.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * Socket Transport Layer 4 * Socket Transport Layer
5 * 5 *
6 * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> 7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> 8 * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
8 * Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de> 9 * Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
@@ -36,6 +37,7 @@
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
37#include <linux/inet.h> 38#include <linux/inet.h>
38#include <linux/idr.h> 39#include <linux/idr.h>
40#include <linux/file.h>
39 41
40#include "debug.h" 42#include "debug.h"
41#include "v9fs.h" 43#include "v9fs.h"
@@ -45,6 +47,7 @@
45 47
46struct v9fs_trans_sock { 48struct v9fs_trans_sock {
47 struct socket *s; 49 struct socket *s;
50 struct file *filp;
48}; 51};
49 52
50/** 53/**
@@ -57,41 +60,26 @@ struct v9fs_trans_sock {
57 60
58static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len) 61static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
59{ 62{
60 struct msghdr msg; 63 int ret;
61 struct kvec iov; 64 struct v9fs_trans_sock *ts;
62 int result;
63 mm_segment_t oldfs;
64 struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
65 65
66 if (trans->status == Disconnected) 66 if (!trans || trans->status == Disconnected) {
67 dprintk(DEBUG_ERROR, "disconnected ...\n");
67 return -EREMOTEIO; 68 return -EREMOTEIO;
69 }
68 70
69 result = -EINVAL; 71 ts = trans->priv;
70
71 oldfs = get_fs();
72 set_fs(get_ds());
73
74 iov.iov_base = v;
75 iov.iov_len = len;
76 msg.msg_name = NULL;
77 msg.msg_namelen = 0;
78 msg.msg_iovlen = 1;
79 msg.msg_control = NULL;
80 msg.msg_controllen = 0;
81 msg.msg_namelen = 0;
82 msg.msg_flags = MSG_NOSIGNAL;
83 72
84 result = kernel_recvmsg(ts->s, &msg, &iov, 1, len, 0); 73 if (!(ts->filp->f_flags & O_NONBLOCK))
74 dprintk(DEBUG_ERROR, "blocking read ...\n");
85 75
86 dprintk(DEBUG_TRANS, "socket state %d\n", ts->s->state); 76 ret = kernel_read(ts->filp, ts->filp->f_pos, v, len);
87 set_fs(oldfs); 77 if (ret <= 0) {
88 78 if (ret != -ERESTARTSYS && ret != -EAGAIN)
89 if (result <= 0) {
90 if (result != -ERESTARTSYS)
91 trans->status = Disconnected; 79 trans->status = Disconnected;
92 } 80 }
93 81
94 return result; 82 return ret;
95} 83}
96 84
97/** 85/**
@@ -104,40 +92,72 @@ static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
104 92
105static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len) 93static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len)
106{ 94{
107 struct kvec iov; 95 int ret;
108 struct msghdr msg;
109 int result = -1;
110 mm_segment_t oldfs; 96 mm_segment_t oldfs;
111 struct v9fs_trans_sock *ts = trans ? trans->priv : NULL; 97 struct v9fs_trans_sock *ts;
112 98
113 dprintk(DEBUG_TRANS, "Sending packet size %d (%x)\n", len, len); 99 if (!trans || trans->status == Disconnected) {
114 dump_data(v, len); 100 dprintk(DEBUG_ERROR, "disconnected ...\n");
101 return -EREMOTEIO;
102 }
103
104 ts = trans->priv;
105 if (!ts) {
106 dprintk(DEBUG_ERROR, "no transport ...\n");
107 return -EREMOTEIO;
108 }
115 109
116 down(&trans->writelock); 110 if (!(ts->filp->f_flags & O_NONBLOCK))
111 dprintk(DEBUG_ERROR, "blocking write ...\n");
117 112
118 oldfs = get_fs(); 113 oldfs = get_fs();
119 set_fs(get_ds()); 114 set_fs(get_ds());
120 iov.iov_base = v; 115 ret = vfs_write(ts->filp, (void __user *)v, len, &ts->filp->f_pos);
121 iov.iov_len = len;
122 msg.msg_name = NULL;
123 msg.msg_namelen = 0;
124 msg.msg_iovlen = 1;
125 msg.msg_control = NULL;
126 msg.msg_controllen = 0;
127 msg.msg_namelen = 0;
128 msg.msg_flags = MSG_NOSIGNAL;
129 result = kernel_sendmsg(ts->s, &msg, &iov, 1, len);
130 set_fs(oldfs); 116 set_fs(oldfs);
131 117
132 if (result < 0) { 118 if (ret < 0) {
133 if (result != -ERESTARTSYS) 119 if (ret != -ERESTARTSYS)
134 trans->status = Disconnected; 120 trans->status = Disconnected;
135 } 121 }
136 122
137 up(&trans->writelock); 123 return ret;
138 return result; 124}
125
126static unsigned int v9fs_sock_poll(struct v9fs_transport *trans,
127 struct poll_table_struct *pt) {
128
129 int ret;
130 struct v9fs_trans_sock *ts;
131 mm_segment_t oldfs;
132
133 if (!trans) {
134 dprintk(DEBUG_ERROR, "no transport\n");
135 return -EIO;
136 }
137
138 ts = trans->priv;
139 if (trans->status != Connected || !ts) {
140 dprintk(DEBUG_ERROR, "transport disconnected: %d\n", trans->status);
141 return -EIO;
142 }
143
144 oldfs = get_fs();
145 set_fs(get_ds());
146
147 if (!ts->filp->f_op || !ts->filp->f_op->poll) {
148 dprintk(DEBUG_ERROR, "no poll operation\n");
149 ret = -EIO;
150 goto end;
151 }
152
153 ret = ts->filp->f_op->poll(ts->filp, pt);
154
155end:
156 set_fs(oldfs);
157 return ret;
139} 158}
140 159
160
141/** 161/**
142 * v9fs_tcp_init - initialize TCP socket 162 * v9fs_tcp_init - initialize TCP socket
143 * @v9ses: session information 163 * @v9ses: session information
@@ -154,9 +174,9 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
154 int rc = 0; 174 int rc = 0;
155 struct v9fs_trans_sock *ts = NULL; 175 struct v9fs_trans_sock *ts = NULL;
156 struct v9fs_transport *trans = v9ses->transport; 176 struct v9fs_transport *trans = v9ses->transport;
177 int fd;
157 178
158 sema_init(&trans->writelock, 1); 179 trans->status = Disconnected;
159 sema_init(&trans->readlock, 1);
160 180
161 ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL); 181 ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
162 182
@@ -165,6 +185,7 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
165 185
166 trans->priv = ts; 186 trans->priv = ts;
167 ts->s = NULL; 187 ts->s = NULL;
188 ts->filp = NULL;
168 189
169 if (!addr) 190 if (!addr)
170 return -EINVAL; 191 return -EINVAL;
@@ -185,7 +206,18 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
185 return rc; 206 return rc;
186 } 207 }
187 csocket->sk->sk_allocation = GFP_NOIO; 208 csocket->sk->sk_allocation = GFP_NOIO;
209
210 fd = sock_map_fd(csocket);
211 if (fd < 0) {
212 sock_release(csocket);
213 kfree(ts);
214 trans->priv = NULL;
215 return fd;
216 }
217
188 ts->s = csocket; 218 ts->s = csocket;
219 ts->filp = fget(fd);
220 ts->filp->f_flags |= O_NONBLOCK;
189 trans->status = Connected; 221 trans->status = Connected;
190 222
191 return 0; 223 return 0;
@@ -203,7 +235,7 @@ static int
203v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name, 235v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
204 char *data) 236 char *data)
205{ 237{
206 int rc; 238 int rc, fd;
207 struct socket *csocket; 239 struct socket *csocket;
208 struct sockaddr_un sun_server; 240 struct sockaddr_un sun_server;
209 struct v9fs_transport *trans; 241 struct v9fs_transport *trans;
@@ -213,6 +245,8 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
213 csocket = NULL; 245 csocket = NULL;
214 trans = v9ses->transport; 246 trans = v9ses->transport;
215 247
248 trans->status = Disconnected;
249
216 if (strlen(dev_name) > UNIX_PATH_MAX) { 250 if (strlen(dev_name) > UNIX_PATH_MAX) {
217 eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n", 251 eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
218 dev_name); 252 dev_name);
@@ -225,9 +259,7 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
225 259
226 trans->priv = ts; 260 trans->priv = ts;
227 ts->s = NULL; 261 ts->s = NULL;
228 262 ts->filp = NULL;
229 sema_init(&trans->writelock, 1);
230 sema_init(&trans->readlock, 1);
231 263
232 sun_server.sun_family = PF_UNIX; 264 sun_server.sun_family = PF_UNIX;
233 strcpy(sun_server.sun_path, dev_name); 265 strcpy(sun_server.sun_path, dev_name);
@@ -241,7 +273,18 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
241 return rc; 273 return rc;
242 } 274 }
243 csocket->sk->sk_allocation = GFP_NOIO; 275 csocket->sk->sk_allocation = GFP_NOIO;
276
277 fd = sock_map_fd(csocket);
278 if (fd < 0) {
279 sock_release(csocket);
280 kfree(ts);
281 trans->priv = NULL;
282 return fd;
283 }
284
244 ts->s = csocket; 285 ts->s = csocket;
286 ts->filp = fget(fd);
287 ts->filp->f_flags |= O_NONBLOCK;
245 trans->status = Connected; 288 trans->status = Connected;
246 289
247 return 0; 290 return 0;
@@ -262,12 +305,11 @@ static void v9fs_sock_close(struct v9fs_transport *trans)
262 305
263 ts = trans->priv; 306 ts = trans->priv;
264 307
265 if ((ts) && (ts->s)) { 308 if ((ts) && (ts->filp)) {
266 dprintk(DEBUG_TRANS, "closing the socket %p\n", ts->s); 309 fput(ts->filp);
267 sock_release(ts->s); 310 ts->filp = NULL;
268 ts->s = NULL; 311 ts->s = NULL;
269 trans->status = Disconnected; 312 trans->status = Disconnected;
270 dprintk(DEBUG_TRANS, "socket closed\n");
271 } 313 }
272 314
273 kfree(ts); 315 kfree(ts);
@@ -280,6 +322,7 @@ struct v9fs_transport v9fs_trans_tcp = {
280 .write = v9fs_sock_send, 322 .write = v9fs_sock_send,
281 .read = v9fs_sock_recv, 323 .read = v9fs_sock_recv,
282 .close = v9fs_sock_close, 324 .close = v9fs_sock_close,
325 .poll = v9fs_sock_poll,
283}; 326};
284 327
285struct v9fs_transport v9fs_trans_unix = { 328struct v9fs_transport v9fs_trans_unix = {
@@ -287,4 +330,5 @@ struct v9fs_transport v9fs_trans_unix = {
287 .write = v9fs_sock_send, 330 .write = v9fs_sock_send,
288 .read = v9fs_sock_recv, 331 .read = v9fs_sock_recv,
289 .close = v9fs_sock_close, 332 .close = v9fs_sock_close,
333 .poll = v9fs_sock_poll,
290}; 334};
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
index 9e9cd418efd5..91fcdb94b361 100644
--- a/fs/9p/transport.h
+++ b/fs/9p/transport.h
@@ -3,6 +3,7 @@
3 * 3 *
4 * Transport Definition 4 * Transport Definition
5 * 5 *
6 * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
6 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> 7 * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
7 * 8 *
8 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
@@ -31,14 +32,13 @@ enum v9fs_transport_status {
31 32
32struct v9fs_transport { 33struct v9fs_transport {
33 enum v9fs_transport_status status; 34 enum v9fs_transport_status status;
34 struct semaphore writelock;
35 struct semaphore readlock;
36 void *priv; 35 void *priv;
37 36
38 int (*init) (struct v9fs_session_info *, const char *, char *); 37 int (*init) (struct v9fs_session_info *, const char *, char *);
39 int (*write) (struct v9fs_transport *, void *, int); 38 int (*write) (struct v9fs_transport *, void *, int);
40 int (*read) (struct v9fs_transport *, void *, int); 39 int (*read) (struct v9fs_transport *, void *, int);
41 void (*close) (struct v9fs_transport *); 40 void (*close) (struct v9fs_transport *);
41 unsigned int (*poll)(struct v9fs_transport *, struct poll_table_struct *);
42}; 42};
43 43
44extern struct v9fs_transport v9fs_trans_tcp; 44extern struct v9fs_transport v9fs_trans_tcp;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 418c3743fdee..5250c428fc1f 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -37,7 +37,6 @@
37#include "v9fs_vfs.h" 37#include "v9fs_vfs.h"
38#include "transport.h" 38#include "transport.h"
39#include "mux.h" 39#include "mux.h"
40#include "conv.h"
41 40
42/* TODO: sysfs or debugfs interface */ 41/* TODO: sysfs or debugfs interface */
43int v9fs_debug_level = 0; /* feature-rific global debug level */ 42int v9fs_debug_level = 0; /* feature-rific global debug level */
@@ -213,7 +212,8 @@ retry:
213 return -1; 212 return -1;
214 } 213 }
215 214
216 error = idr_get_new(&p->pool, NULL, &i); 215 /* no need to store exactly p, we just need something non-null */
216 error = idr_get_new(&p->pool, p, &i);
217 up(&p->lock); 217 up(&p->lock);
218 218
219 if (error == -EAGAIN) 219 if (error == -EAGAIN)
@@ -243,6 +243,16 @@ void v9fs_put_idpool(int id, struct v9fs_idpool *p)
243} 243}
244 244
245/** 245/**
246 * v9fs_check_idpool - check if the specified id is available
247 * @id - id to check
248 * @p - pool
249 */
250int v9fs_check_idpool(int id, struct v9fs_idpool *p)
251{
252 return idr_find(&p->pool, id) != NULL;
253}
254
255/**
246 * v9fs_session_init - initialize session 256 * v9fs_session_init - initialize session
247 * @v9ses: session information structure 257 * @v9ses: session information structure
248 * @dev_name: device being mounted 258 * @dev_name: device being mounted
@@ -259,6 +269,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
259 int n = 0; 269 int n = 0;
260 int newfid = -1; 270 int newfid = -1;
261 int retval = -EINVAL; 271 int retval = -EINVAL;
272 struct v9fs_str *version;
262 273
263 v9ses->name = __getname(); 274 v9ses->name = __getname();
264 if (!v9ses->name) 275 if (!v9ses->name)
@@ -281,9 +292,6 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
281 /* id pools that are session-dependent: FIDs and TIDs */ 292 /* id pools that are session-dependent: FIDs and TIDs */
282 idr_init(&v9ses->fidpool.pool); 293 idr_init(&v9ses->fidpool.pool);
283 init_MUTEX(&v9ses->fidpool.lock); 294 init_MUTEX(&v9ses->fidpool.lock);
284 idr_init(&v9ses->tidpool.pool);
285 init_MUTEX(&v9ses->tidpool.lock);
286
287 295
288 switch (v9ses->proto) { 296 switch (v9ses->proto) {
289 case PROTO_TCP: 297 case PROTO_TCP:
@@ -320,7 +328,12 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
320 v9ses->shutdown = 0; 328 v9ses->shutdown = 0;
321 v9ses->session_hung = 0; 329 v9ses->session_hung = 0;
322 330
323 if ((retval = v9fs_mux_init(v9ses, dev_name)) < 0) { 331 v9ses->mux = v9fs_mux_init(v9ses->transport, v9ses->maxdata + V9FS_IOHDRSZ,
332 &v9ses->extended);
333
334 if (IS_ERR(v9ses->mux)) {
335 retval = PTR_ERR(v9ses->mux);
336 v9ses->mux = NULL;
324 dprintk(DEBUG_ERROR, "problem initializing mux\n"); 337 dprintk(DEBUG_ERROR, "problem initializing mux\n");
325 goto SessCleanUp; 338 goto SessCleanUp;
326 } 339 }
@@ -339,13 +352,16 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
339 goto FreeFcall; 352 goto FreeFcall;
340 } 353 }
341 354
342 /* Really should check for 9P1 and report error */ 355 version = &fcall->params.rversion.version;
343 if (!strcmp(fcall->params.rversion.version, "9P2000.u")) { 356 if (version->len==8 && !memcmp(version->str, "9P2000.u", 8)) {
344 dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n"); 357 dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n");
345 v9ses->extended = 1; 358 v9ses->extended = 1;
346 } else { 359 } else if (version->len==6 && !memcmp(version->str, "9P2000", 6)) {
347 dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n"); 360 dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n");
348 v9ses->extended = 0; 361 v9ses->extended = 0;
362 } else {
363 retval = -EREMOTEIO;
364 goto FreeFcall;
349 } 365 }
350 366
351 n = fcall->params.rversion.msize; 367 n = fcall->params.rversion.msize;
@@ -381,7 +397,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
381 } 397 }
382 398
383 if (v9ses->afid != ~0) { 399 if (v9ses->afid != ~0) {
384 if (v9fs_t_clunk(v9ses, v9ses->afid, NULL)) 400 if (v9fs_t_clunk(v9ses, v9ses->afid))
385 dprintk(DEBUG_ERROR, "clunk failed\n"); 401 dprintk(DEBUG_ERROR, "clunk failed\n");
386 } 402 }
387 403
@@ -403,13 +419,16 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
403 419
404void v9fs_session_close(struct v9fs_session_info *v9ses) 420void v9fs_session_close(struct v9fs_session_info *v9ses)
405{ 421{
406 if (v9ses->recvproc) { 422 if (v9ses->mux) {
407 send_sig(SIGKILL, v9ses->recvproc, 1); 423 v9fs_mux_destroy(v9ses->mux);
408 wait_for_completion(&v9ses->proccmpl); 424 v9ses->mux = NULL;
409 } 425 }
410 426
411 if (v9ses->transport) 427 if (v9ses->transport) {
412 v9ses->transport->close(v9ses->transport); 428 v9ses->transport->close(v9ses->transport);
429 kfree(v9ses->transport);
430 v9ses->transport = NULL;
431 }
413 432
414 __putname(v9ses->name); 433 __putname(v9ses->name);
415 __putname(v9ses->remotename); 434 __putname(v9ses->remotename);
@@ -420,8 +439,9 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
420 * and cancel all pending requests. 439 * and cancel all pending requests.
421 */ 440 */
422void v9fs_session_cancel(struct v9fs_session_info *v9ses) { 441void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
442 dprintk(DEBUG_ERROR, "cancel session %p\n", v9ses);
423 v9ses->transport->status = Disconnected; 443 v9ses->transport->status = Disconnected;
424 v9fs_mux_cancel_requests(v9ses, -EIO); 444 v9fs_mux_cancel(v9ses->mux, -EIO);
425} 445}
426 446
427extern int v9fs_error_init(void); 447extern int v9fs_error_init(void);
@@ -433,11 +453,17 @@ extern int v9fs_error_init(void);
433 453
434static int __init init_v9fs(void) 454static int __init init_v9fs(void)
435{ 455{
456 int ret;
457
436 v9fs_error_init(); 458 v9fs_error_init();
437 459
438 printk(KERN_INFO "Installing v9fs 9P2000 file system support\n"); 460 printk(KERN_INFO "Installing v9fs 9P2000 file system support\n");
439 461
440 return register_filesystem(&v9fs_fs_type); 462 ret = v9fs_mux_global_init();
463 if (!ret)
464 ret = register_filesystem(&v9fs_fs_type);
465
466 return ret;
441} 467}
442 468
443/** 469/**
@@ -447,6 +473,7 @@ static int __init init_v9fs(void)
447 473
448static void __exit exit_v9fs(void) 474static void __exit exit_v9fs(void)
449{ 475{
476 v9fs_mux_global_exit();
450 unregister_filesystem(&v9fs_fs_type); 477 unregister_filesystem(&v9fs_fs_type);
451} 478}
452 479
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 45dcef42bdd6..f337da7a0eec 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -57,24 +57,14 @@ struct v9fs_session_info {
57 57
58 /* book keeping */ 58 /* book keeping */
59 struct v9fs_idpool fidpool; /* The FID pool for file descriptors */ 59 struct v9fs_idpool fidpool; /* The FID pool for file descriptors */
60 struct v9fs_idpool tidpool; /* The TID pool for transactions ids */
61 60
62 /* transport information */
63 struct v9fs_transport *transport; 61 struct v9fs_transport *transport;
62 struct v9fs_mux_data *mux;
64 63
65 int inprogress; /* session in progress => true */ 64 int inprogress; /* session in progress => true */
66 int shutdown; /* session shutting down. no more attaches. */ 65 int shutdown; /* session shutting down. no more attaches. */
67 unsigned char session_hung; 66 unsigned char session_hung;
68 67 struct dentry *debugfs_dir;
69 /* mux private data */
70 struct v9fs_fcall *curfcall;
71 wait_queue_head_t read_wait;
72 struct completion fcread;
73 struct completion proccmpl;
74 struct task_struct *recvproc;
75
76 spinlock_t muxlock;
77 struct list_head mux_fcalls;
78}; 68};
79 69
80/* possible values of ->proto */ 70/* possible values of ->proto */
@@ -84,11 +74,14 @@ enum {
84 PROTO_FD, 74 PROTO_FD,
85}; 75};
86 76
77extern struct dentry *v9fs_debugfs_root;
78
87int v9fs_session_init(struct v9fs_session_info *, const char *, char *); 79int v9fs_session_init(struct v9fs_session_info *, const char *, char *);
88struct v9fs_session_info *v9fs_inode2v9ses(struct inode *); 80struct v9fs_session_info *v9fs_inode2v9ses(struct inode *);
89void v9fs_session_close(struct v9fs_session_info *v9ses); 81void v9fs_session_close(struct v9fs_session_info *v9ses);
90int v9fs_get_idpool(struct v9fs_idpool *p); 82int v9fs_get_idpool(struct v9fs_idpool *p);
91void v9fs_put_idpool(int id, struct v9fs_idpool *p); 83void v9fs_put_idpool(int id, struct v9fs_idpool *p);
84int v9fs_check_idpool(int id, struct v9fs_idpool *p);
92void v9fs_session_cancel(struct v9fs_session_info *v9ses); 85void v9fs_session_cancel(struct v9fs_session_info *v9ses);
93 86
94#define V9FS_MAGIC 0x01021997 87#define V9FS_MAGIC 0x01021997
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 2f2cea7ee3e7..69cf2905dc90 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -39,15 +39,15 @@
39 */ 39 */
40 40
41extern struct file_system_type v9fs_fs_type; 41extern struct file_system_type v9fs_fs_type;
42extern struct address_space_operations v9fs_addr_operations;
42extern struct file_operations v9fs_file_operations; 43extern struct file_operations v9fs_file_operations;
43extern struct file_operations v9fs_dir_operations; 44extern struct file_operations v9fs_dir_operations;
44extern struct dentry_operations v9fs_dentry_operations; 45extern struct dentry_operations v9fs_dentry_operations;
45 46
46struct inode *v9fs_get_inode(struct super_block *sb, int mode); 47struct inode *v9fs_get_inode(struct super_block *sb, int mode);
47ino_t v9fs_qid2ino(struct v9fs_qid *qid); 48ino_t v9fs_qid2ino(struct v9fs_qid *qid);
48void v9fs_mistat2inode(struct v9fs_stat *, struct inode *, 49void v9fs_stat2inode(struct v9fs_stat *, struct inode *, struct super_block *);
49 struct super_block *);
50int v9fs_dir_release(struct inode *inode, struct file *filp); 50int v9fs_dir_release(struct inode *inode, struct file *filp);
51int v9fs_file_open(struct inode *inode, struct file *file); 51int v9fs_file_open(struct inode *inode, struct file *file);
52void v9fs_inode2mistat(struct inode *inode, struct v9fs_stat *mistat); 52void v9fs_inode2stat(struct inode *inode, struct v9fs_stat *stat);
53void v9fs_dentry_release(struct dentry *); 53void v9fs_dentry_release(struct dentry *);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
new file mode 100644
index 000000000000..8100fb5171b7
--- /dev/null
+++ b/fs/9p/vfs_addr.c
@@ -0,0 +1,109 @@
1/*
2 * linux/fs/9p/vfs_addr.c
3 *
4 * This file contians vfs address (mmap) ops for 9P2000.
5 *
6 * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
7 * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to:
21 * Free Software Foundation
22 * 51 Franklin Street, Fifth Floor
23 * Boston, MA 02111-1301 USA
24 *
25 */
26
27#include <linux/module.h>
28#include <linux/errno.h>
29#include <linux/fs.h>
30#include <linux/file.h>
31#include <linux/stat.h>
32#include <linux/string.h>
33#include <linux/smp_lock.h>
34#include <linux/inet.h>
35#include <linux/version.h>
36#include <linux/pagemap.h>
37#include <linux/idr.h>
38
39#include "debug.h"
40#include "v9fs.h"
41#include "9p.h"
42#include "v9fs_vfs.h"
43#include "fid.h"
44
45/**
46 * v9fs_vfs_readpage - read an entire page in from 9P
47 *
48 * @file: file being read
49 * @page: structure to page
50 *
51 */
52
53static int v9fs_vfs_readpage(struct file *filp, struct page *page)
54{
55 char *buffer = NULL;
56 int retval = -EIO;
57 loff_t offset = page_offset(page);
58 int count = PAGE_CACHE_SIZE;
59 struct inode *inode = filp->f_dentry->d_inode;
60 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
61 int rsize = v9ses->maxdata - V9FS_IOHDRSZ;
62 struct v9fs_fid *v9f = filp->private_data;
63 struct v9fs_fcall *fcall = NULL;
64 int fid = v9f->fid;
65 int total = 0;
66 int result = 0;
67
68 buffer = kmap(page);
69 do {
70 if (count < rsize)
71 rsize = count;
72
73 result = v9fs_t_read(v9ses, fid, offset, rsize, &fcall);
74
75 if (result < 0) {
76 printk(KERN_ERR "v9fs_t_read returned %d\n",
77 result);
78
79 kfree(fcall);
80 goto UnmapAndUnlock;
81 } else
82 offset += result;
83
84 memcpy(buffer, fcall->params.rread.data, result);
85
86 count -= result;
87 buffer += result;
88 total += result;
89
90 kfree(fcall);
91
92 if (result < rsize)
93 break;
94 } while (count);
95
96 memset(buffer, 0, count);
97 flush_dcache_page(page);
98 SetPageUptodate(page);
99 retval = 0;
100
101UnmapAndUnlock:
102 kunmap(page);
103 unlock_page(page);
104 return retval;
105}
106
107struct address_space_operations v9fs_addr_operations = {
108 .readpage = v9fs_vfs_readpage,
109};
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index a6aa947de0f9..2dd806dac9f1 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -40,7 +40,6 @@
40#include "v9fs.h" 40#include "v9fs.h"
41#include "9p.h" 41#include "9p.h"
42#include "v9fs_vfs.h" 42#include "v9fs_vfs.h"
43#include "conv.h"
44#include "fid.h" 43#include "fid.h"
45 44
46/** 45/**
@@ -95,24 +94,22 @@ static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd)
95 94
96void v9fs_dentry_release(struct dentry *dentry) 95void v9fs_dentry_release(struct dentry *dentry)
97{ 96{
97 int err;
98
98 dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); 99 dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
99 100
100 if (dentry->d_fsdata != NULL) { 101 if (dentry->d_fsdata != NULL) {
101 struct list_head *fid_list = dentry->d_fsdata; 102 struct list_head *fid_list = dentry->d_fsdata;
102 struct v9fs_fid *temp = NULL; 103 struct v9fs_fid *temp = NULL;
103 struct v9fs_fid *current_fid = NULL; 104 struct v9fs_fid *current_fid = NULL;
104 struct v9fs_fcall *fcall = NULL;
105 105
106 list_for_each_entry_safe(current_fid, temp, fid_list, list) { 106 list_for_each_entry_safe(current_fid, temp, fid_list, list) {
107 if (v9fs_t_clunk 107 err = v9fs_t_clunk(current_fid->v9ses, current_fid->fid);
108 (current_fid->v9ses, current_fid->fid, &fcall))
109 dprintk(DEBUG_ERROR, "clunk failed: %s\n",
110 FCALL_ERROR(fcall));
111 108
112 v9fs_put_idpool(current_fid->fid, 109 if (err < 0)
113 &current_fid->v9ses->fidpool); 110 dprintk(DEBUG_ERROR, "clunk failed: %d name %s\n",
111 err, dentry->d_iname);
114 112
115 kfree(fcall);
116 v9fs_fid_destroy(current_fid); 113 v9fs_fid_destroy(current_fid);
117 } 114 }
118 115
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 57a43b8feef5..ae6d032b9b59 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -37,8 +37,8 @@
37#include "debug.h" 37#include "debug.h"
38#include "v9fs.h" 38#include "v9fs.h"
39#include "9p.h" 39#include "9p.h"
40#include "v9fs_vfs.h"
41#include "conv.h" 40#include "conv.h"
41#include "v9fs_vfs.h"
42#include "fid.h" 42#include "fid.h"
43 43
44/** 44/**
@@ -74,20 +74,16 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
74 struct inode *inode = filp->f_dentry->d_inode; 74 struct inode *inode = filp->f_dentry->d_inode;
75 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); 75 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
76 struct v9fs_fid *file = filp->private_data; 76 struct v9fs_fid *file = filp->private_data;
77 unsigned int i, n; 77 unsigned int i, n, s;
78 int fid = -1; 78 int fid = -1;
79 int ret = 0; 79 int ret = 0;
80 struct v9fs_stat *mi = NULL; 80 struct v9fs_stat stat;
81 int over = 0; 81 int over = 0;
82 82
83 dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name); 83 dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name);
84 84
85 fid = file->fid; 85 fid = file->fid;
86 86
87 mi = kmalloc(v9ses->maxdata, GFP_KERNEL);
88 if (!mi)
89 return -ENOMEM;
90
91 if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) { 87 if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) {
92 kfree(file->rdir_fcall); 88 kfree(file->rdir_fcall);
93 file->rdir_fcall = NULL; 89 file->rdir_fcall = NULL;
@@ -97,20 +93,20 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
97 n = file->rdir_fcall->params.rread.count; 93 n = file->rdir_fcall->params.rread.count;
98 i = file->rdir_fpos; 94 i = file->rdir_fpos;
99 while (i < n) { 95 while (i < n) {
100 int s = v9fs_deserialize_stat(v9ses, 96 s = v9fs_deserialize_stat(
101 file->rdir_fcall->params.rread.data + i, 97 file->rdir_fcall->params.rread.data + i,
102 n - i, mi, v9ses->maxdata); 98 n - i, &stat, v9ses->extended);
103 99
104 if (s == 0) { 100 if (s == 0) {
105 dprintk(DEBUG_ERROR, 101 dprintk(DEBUG_ERROR,
106 "error while deserializing mistat\n"); 102 "error while deserializing stat\n");
107 ret = -EIO; 103 ret = -EIO;
108 goto FreeStructs; 104 goto FreeStructs;
109 } 105 }
110 106
111 over = filldir(dirent, mi->name, strlen(mi->name), 107 over = filldir(dirent, stat.name.str, stat.name.len,
112 filp->f_pos, v9fs_qid2ino(&mi->qid), 108 filp->f_pos, v9fs_qid2ino(&stat.qid),
113 dt_type(mi)); 109 dt_type(&stat));
114 110
115 if (over) { 111 if (over) {
116 file->rdir_fpos = i; 112 file->rdir_fpos = i;
@@ -130,7 +126,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
130 126
131 while (!over) { 127 while (!over) {
132 ret = v9fs_t_read(v9ses, fid, filp->f_pos, 128 ret = v9fs_t_read(v9ses, fid, filp->f_pos,
133 v9ses->maxdata-V9FS_IOHDRSZ, &fcall); 129 v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
134 if (ret < 0) { 130 if (ret < 0) {
135 dprintk(DEBUG_ERROR, "error while reading: %d: %p\n", 131 dprintk(DEBUG_ERROR, "error while reading: %d: %p\n",
136 ret, fcall); 132 ret, fcall);
@@ -141,19 +137,18 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
141 n = ret; 137 n = ret;
142 i = 0; 138 i = 0;
143 while (i < n) { 139 while (i < n) {
144 int s = v9fs_deserialize_stat(v9ses, 140 s = v9fs_deserialize_stat(fcall->params.rread.data + i,
145 fcall->params.rread.data + i, n - i, mi, 141 n - i, &stat, v9ses->extended);
146 v9ses->maxdata);
147 142
148 if (s == 0) { 143 if (s == 0) {
149 dprintk(DEBUG_ERROR, 144 dprintk(DEBUG_ERROR,
150 "error while deserializing mistat\n"); 145 "error while deserializing stat\n");
151 return -EIO; 146 return -EIO;
152 } 147 }
153 148
154 over = filldir(dirent, mi->name, strlen(mi->name), 149 over = filldir(dirent, stat.name.str, stat.name.len,
155 filp->f_pos, v9fs_qid2ino(&mi->qid), 150 filp->f_pos, v9fs_qid2ino(&stat.qid),
156 dt_type(mi)); 151 dt_type(&stat));
157 152
158 if (over) { 153 if (over) {
159 file->rdir_fcall = fcall; 154 file->rdir_fcall = fcall;
@@ -172,7 +167,6 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
172 167
173 FreeStructs: 168 FreeStructs:
174 kfree(fcall); 169 kfree(fcall);
175 kfree(mi);
176 return ret; 170 return ret;
177} 171}
178 172
@@ -193,18 +187,15 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
193 fid->fid); 187 fid->fid);
194 fidnum = fid->fid; 188 fidnum = fid->fid;
195 189
196 filemap_fdatawrite(inode->i_mapping); 190 filemap_write_and_wait(inode->i_mapping);
197 filemap_fdatawait(inode->i_mapping);
198 191
199 if (fidnum >= 0) { 192 if (fidnum >= 0) {
200 dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen, 193 dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen,
201 fid->fid); 194 fid->fid);
202 195
203 if (v9fs_t_clunk(v9ses, fidnum, NULL)) 196 if (v9fs_t_clunk(v9ses, fidnum))
204 dprintk(DEBUG_ERROR, "clunk failed\n"); 197 dprintk(DEBUG_ERROR, "clunk failed\n");
205 198
206 v9fs_put_idpool(fid->fid, &v9ses->fidpool);
207
208 kfree(fid->rdir_fcall); 199 kfree(fid->rdir_fcall);
209 kfree(fid); 200 kfree(fid);
210 201
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 89c849da8504..c7e14d917215 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -32,6 +32,7 @@
32#include <linux/string.h> 32#include <linux/string.h>
33#include <linux/smp_lock.h> 33#include <linux/smp_lock.h>
34#include <linux/inet.h> 34#include <linux/inet.h>
35#include <linux/version.h>
35#include <linux/list.h> 36#include <linux/list.h>
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
37#include <linux/idr.h> 38#include <linux/idr.h>
@@ -117,9 +118,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
117 118
118 result = v9fs_t_open(v9ses, newfid, open_mode, &fcall); 119 result = v9fs_t_open(v9ses, newfid, open_mode, &fcall);
119 if (result < 0) { 120 if (result < 0) {
120 dprintk(DEBUG_ERROR, 121 PRINT_FCALL_ERROR("open failed", fcall);
121 "open failed, open_mode 0x%x: %s\n", open_mode,
122 FCALL_ERROR(fcall));
123 kfree(fcall); 122 kfree(fcall);
124 return result; 123 return result;
125 } 124 }
@@ -165,8 +164,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
165 return -ENOLCK; 164 return -ENOLCK;
166 165
167 if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { 166 if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
168 filemap_fdatawrite(inode->i_mapping); 167 filemap_write_and_wait(inode->i_mapping);
169 filemap_fdatawait(inode->i_mapping);
170 invalidate_inode_pages(&inode->i_data); 168 invalidate_inode_pages(&inode->i_data);
171 } 169 }
172 170
@@ -257,7 +255,6 @@ v9fs_file_write(struct file *filp, const char __user * data,
257 int result = -EIO; 255 int result = -EIO;
258 int rsize = 0; 256 int rsize = 0;
259 int total = 0; 257 int total = 0;
260 char *buf;
261 258
262 dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count, 259 dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count,
263 (int)*offset); 260 (int)*offset);
@@ -265,28 +262,14 @@ v9fs_file_write(struct file *filp, const char __user * data,
265 if (v9fid->iounit != 0 && rsize > v9fid->iounit) 262 if (v9fid->iounit != 0 && rsize > v9fid->iounit)
266 rsize = v9fid->iounit; 263 rsize = v9fid->iounit;
267 264
268 buf = kmalloc(v9ses->maxdata - V9FS_IOHDRSZ, GFP_KERNEL);
269 if (!buf)
270 return -ENOMEM;
271
272 do { 265 do {
273 if (count < rsize) 266 if (count < rsize)
274 rsize = count; 267 rsize = count;
275 268
276 result = copy_from_user(buf, data, rsize); 269 result = v9fs_t_write(v9ses, fid, *offset, rsize, data, &fcall);
277 if (result) {
278 dprintk(DEBUG_ERROR, "Problem copying from user\n");
279 kfree(buf);
280 return -EFAULT;
281 }
282
283 dump_data(buf, rsize);
284 result = v9fs_t_write(v9ses, fid, *offset, rsize, buf, &fcall);
285 if (result < 0) { 270 if (result < 0) {
286 eprintk(KERN_ERR, "error while writing: %s(%d)\n", 271 PRINT_FCALL_ERROR("error while writing", fcall);
287 FCALL_ERROR(fcall), result);
288 kfree(fcall); 272 kfree(fcall);
289 kfree(buf);
290 return result; 273 return result;
291 } else 274 } else
292 *offset += result; 275 *offset += result;
@@ -306,7 +289,9 @@ v9fs_file_write(struct file *filp, const char __user * data,
306 total += result; 289 total += result;
307 } while (count); 290 } while (count);
308 291
309 kfree(buf); 292 if(inode->i_mapping->nrpages)
293 invalidate_inode_pages2(inode->i_mapping);
294
310 return total; 295 return total;
311} 296}
312 297
@@ -317,4 +302,5 @@ struct file_operations v9fs_file_operations = {
317 .open = v9fs_file_open, 302 .open = v9fs_file_open,
318 .release = v9fs_dir_release, 303 .release = v9fs_dir_release,
319 .lock = v9fs_file_lock, 304 .lock = v9fs_file_lock,
305 .mmap = generic_file_mmap,
320}; 306};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 0ea965c3bb7d..91f552454c76 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -40,7 +40,6 @@
40#include "v9fs.h" 40#include "v9fs.h"
41#include "9p.h" 41#include "9p.h"
42#include "v9fs_vfs.h" 42#include "v9fs_vfs.h"
43#include "conv.h"
44#include "fid.h" 43#include "fid.h"
45 44
46static struct inode_operations v9fs_dir_inode_operations; 45static struct inode_operations v9fs_dir_inode_operations;
@@ -127,100 +126,32 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
127} 126}
128 127
129/** 128/**
130 * v9fs_blank_mistat - helper function to setup a 9P stat structure 129 * v9fs_blank_wstat - helper function to setup a 9P stat structure
131 * @v9ses: 9P session info (for determining extended mode) 130 * @v9ses: 9P session info (for determining extended mode)
132 * @mistat: structure to initialize 131 * @wstat: structure to initialize
133 * 132 *
134 */ 133 */
135 134
136static void 135static void
137v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat) 136v9fs_blank_wstat(struct v9fs_wstat *wstat)
138{ 137{
139 mistat->type = ~0; 138 wstat->type = ~0;
140 mistat->dev = ~0; 139 wstat->dev = ~0;
141 mistat->qid.type = ~0; 140 wstat->qid.type = ~0;
142 mistat->qid.version = ~0; 141 wstat->qid.version = ~0;
143 *((long long *)&mistat->qid.path) = ~0; 142 *((long long *)&wstat->qid.path) = ~0;
144 mistat->mode = ~0; 143 wstat->mode = ~0;
145 mistat->atime = ~0; 144 wstat->atime = ~0;
146 mistat->mtime = ~0; 145 wstat->mtime = ~0;
147 mistat->length = ~0; 146 wstat->length = ~0;
148 mistat->name = mistat->data; 147 wstat->name = NULL;
149 mistat->uid = mistat->data; 148 wstat->uid = NULL;
150 mistat->gid = mistat->data; 149 wstat->gid = NULL;
151 mistat->muid = mistat->data; 150 wstat->muid = NULL;
152 if (v9ses->extended) { 151 wstat->n_uid = ~0;
153 mistat->n_uid = ~0; 152 wstat->n_gid = ~0;
154 mistat->n_gid = ~0; 153 wstat->n_muid = ~0;
155 mistat->n_muid = ~0; 154 wstat->extension = NULL;
156 mistat->extension = mistat->data;
157 }
158 *mistat->data = 0;
159}
160
161/**
162 * v9fs_mistat2unix - convert mistat to unix stat
163 * @mistat: Plan 9 metadata (mistat) structure
164 * @buf: unix metadata (stat) structure to populate
165 * @sb: superblock
166 *
167 */
168
169static void
170v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf,
171 struct super_block *sb)
172{
173 struct v9fs_session_info *v9ses = sb ? sb->s_fs_info : NULL;
174
175 buf->st_nlink = 1;
176
177 buf->st_atime = mistat->atime;
178 buf->st_mtime = mistat->mtime;
179 buf->st_ctime = mistat->mtime;
180
181 buf->st_uid = (unsigned short)-1;
182 buf->st_gid = (unsigned short)-1;
183
184 if (v9ses && v9ses->extended) {
185 /* TODO: string to uid mapping via user-space daemon */
186 if (mistat->n_uid != -1)
187 sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid);
188
189 if (mistat->n_gid != -1)
190 sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid);
191 }
192
193 if (buf->st_uid == (unsigned short)-1)
194 buf->st_uid = v9ses->uid;
195 if (buf->st_gid == (unsigned short)-1)
196 buf->st_gid = v9ses->gid;
197
198 buf->st_mode = p9mode2unixmode(v9ses, mistat->mode);
199 if ((S_ISBLK(buf->st_mode)) || (S_ISCHR(buf->st_mode))) {
200 char type = 0;
201 int major = -1;
202 int minor = -1;
203 sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
204 switch (type) {
205 case 'c':
206 buf->st_mode &= ~S_IFBLK;
207 buf->st_mode |= S_IFCHR;
208 break;
209 case 'b':
210 break;
211 default:
212 dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
213 type, mistat->extension);
214 };
215 buf->st_rdev = MKDEV(major, minor);
216 } else
217 buf->st_rdev = 0;
218
219 buf->st_size = mistat->length;
220
221 buf->st_blksize = sb->s_blocksize;
222 buf->st_blocks =
223 (buf->st_size + buf->st_blksize - 1) >> sb->s_blocksize_bits;
224} 155}
225 156
226/** 157/**
@@ -246,6 +177,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
246 inode->i_blocks = 0; 177 inode->i_blocks = 0;
247 inode->i_rdev = 0; 178 inode->i_rdev = 0;
248 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 179 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
180 inode->i_mapping->a_ops = &v9fs_addr_operations;
249 181
250 switch (mode & S_IFMT) { 182 switch (mode & S_IFMT) {
251 case S_IFIFO: 183 case S_IFIFO:
@@ -312,12 +244,12 @@ v9fs_create(struct inode *dir,
312 struct inode *file_inode = NULL; 244 struct inode *file_inode = NULL;
313 struct v9fs_fcall *fcall = NULL; 245 struct v9fs_fcall *fcall = NULL;
314 struct v9fs_qid qid; 246 struct v9fs_qid qid;
315 struct stat newstat;
316 int dirfidnum = -1; 247 int dirfidnum = -1;
317 long newfid = -1; 248 long newfid = -1;
318 int result = 0; 249 int result = 0;
319 unsigned int iounit = 0; 250 unsigned int iounit = 0;
320 int wfidno = -1; 251 int wfidno = -1;
252 int err;
321 253
322 perm = unixmode2p9mode(v9ses, perm); 254 perm = unixmode2p9mode(v9ses, perm);
323 255
@@ -349,57 +281,64 @@ v9fs_create(struct inode *dir,
349 281
350 result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall); 282 result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall);
351 if (result < 0) { 283 if (result < 0) {
352 dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall)); 284 PRINT_FCALL_ERROR("clone error", fcall);
353 v9fs_put_idpool(newfid, &v9ses->fidpool); 285 v9fs_put_idpool(newfid, &v9ses->fidpool);
354 newfid = -1; 286 newfid = -1;
355 goto CleanUpFid; 287 goto CleanUpFid;
356 } 288 }
357 289
358 kfree(fcall); 290 kfree(fcall);
291 fcall = NULL;
359 292
360 result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name, 293 result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name,
361 perm, open_mode, &fcall); 294 perm, open_mode, &fcall);
362 if (result < 0) { 295 if (result < 0) {
363 dprintk(DEBUG_ERROR, "create fails: %s(%d)\n", 296 PRINT_FCALL_ERROR("create fails", fcall);
364 FCALL_ERROR(fcall), result);
365
366 goto CleanUpFid; 297 goto CleanUpFid;
367 } 298 }
368 299
369 iounit = fcall->params.rcreate.iounit; 300 iounit = fcall->params.rcreate.iounit;
370 qid = fcall->params.rcreate.qid; 301 qid = fcall->params.rcreate.qid;
371 kfree(fcall); 302 kfree(fcall);
303 fcall = NULL;
372 304
373 fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1); 305 if (!(perm&V9FS_DMDIR)) {
374 dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate); 306 fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1);
375 if (!fid) { 307 dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate);
376 result = -ENOMEM; 308 if (!fid) {
377 goto CleanUpFid; 309 result = -ENOMEM;
378 } 310 goto CleanUpFid;
311 }
379 312
380 fid->qid = qid; 313 fid->qid = qid;
381 fid->iounit = iounit; 314 fid->iounit = iounit;
315 } else {
316 err = v9fs_t_clunk(v9ses, newfid);
317 newfid = -1;
318 if (err < 0)
319 dprintk(DEBUG_ERROR, "clunk for mkdir failed: %d\n", err);
320 }
382 321
383 /* walk to the newly created file and put the fid in the dentry */ 322 /* walk to the newly created file and put the fid in the dentry */
384 wfidno = v9fs_get_idpool(&v9ses->fidpool); 323 wfidno = v9fs_get_idpool(&v9ses->fidpool);
385 if (newfid < 0) { 324 if (wfidno < 0) {
386 eprintk(KERN_WARNING, "no free fids available\n"); 325 eprintk(KERN_WARNING, "no free fids available\n");
387 return -ENOSPC; 326 return -ENOSPC;
388 } 327 }
389 328
390 result = v9fs_t_walk(v9ses, dirfidnum, wfidno, 329 result = v9fs_t_walk(v9ses, dirfidnum, wfidno,
391 (char *) file_dentry->d_name.name, NULL); 330 (char *) file_dentry->d_name.name, &fcall);
392 if (result < 0) { 331 if (result < 0) {
393 dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall)); 332 PRINT_FCALL_ERROR("clone error", fcall);
394 v9fs_put_idpool(wfidno, &v9ses->fidpool); 333 v9fs_put_idpool(wfidno, &v9ses->fidpool);
395 wfidno = -1; 334 wfidno = -1;
396 goto CleanUpFid; 335 goto CleanUpFid;
397 } 336 }
337 kfree(fcall);
338 fcall = NULL;
398 339
399 if (!v9fs_fid_create(file_dentry, v9ses, wfidno, 0)) { 340 if (!v9fs_fid_create(file_dentry, v9ses, wfidno, 0)) {
400 if (!v9fs_t_clunk(v9ses, newfid, &fcall)) { 341 v9fs_put_idpool(wfidno, &v9ses->fidpool);
401 v9fs_put_idpool(wfidno, &v9ses->fidpool);
402 }
403 342
404 goto CleanUpFid; 343 goto CleanUpFid;
405 } 344 }
@@ -409,62 +348,43 @@ v9fs_create(struct inode *dir,
409 (perm & V9FS_DMDEVICE)) 348 (perm & V9FS_DMDEVICE))
410 return 0; 349 return 0;
411 350
412 result = v9fs_t_stat(v9ses, newfid, &fcall); 351 result = v9fs_t_stat(v9ses, wfidno, &fcall);
413 if (result < 0) { 352 if (result < 0) {
414 dprintk(DEBUG_ERROR, "stat error: %s(%d)\n", FCALL_ERROR(fcall), 353 PRINT_FCALL_ERROR("stat error", fcall);
415 result);
416 goto CleanUpFid; 354 goto CleanUpFid;
417 } 355 }
418 356
419 v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
420 357
421 file_inode = v9fs_get_inode(sb, newstat.st_mode); 358 file_inode = v9fs_get_inode(sb,
359 p9mode2unixmode(v9ses, fcall->params.rstat.stat.mode));
360
422 if ((!file_inode) || IS_ERR(file_inode)) { 361 if ((!file_inode) || IS_ERR(file_inode)) {
423 dprintk(DEBUG_ERROR, "create inode failed\n"); 362 dprintk(DEBUG_ERROR, "create inode failed\n");
424 result = -EBADF; 363 result = -EBADF;
425 goto CleanUpFid; 364 goto CleanUpFid;
426 } 365 }
427 366
428 v9fs_mistat2inode(fcall->params.rstat.stat, file_inode, sb); 367 v9fs_stat2inode(&fcall->params.rstat.stat, file_inode, sb);
429 kfree(fcall); 368 kfree(fcall);
430 fcall = NULL; 369 fcall = NULL;
431 file_dentry->d_op = &v9fs_dentry_operations; 370 file_dentry->d_op = &v9fs_dentry_operations;
432 d_instantiate(file_dentry, file_inode); 371 d_instantiate(file_dentry, file_inode);
433 372
434 if (perm & V9FS_DMDIR) {
435 if (!v9fs_t_clunk(v9ses, newfid, &fcall))
436 v9fs_put_idpool(newfid, &v9ses->fidpool);
437 else
438 dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n",
439 FCALL_ERROR(fcall));
440 kfree(fcall);
441 fid->fidopen = 0;
442 fid->fidcreate = 0;
443 d_drop(file_dentry);
444 }
445
446 return 0; 373 return 0;
447 374
448 CleanUpFid: 375 CleanUpFid:
449 kfree(fcall); 376 kfree(fcall);
377 fcall = NULL;
450 378
451 if (newfid >= 0) { 379 if (newfid >= 0) {
452 if (!v9fs_t_clunk(v9ses, newfid, &fcall)) 380 err = v9fs_t_clunk(v9ses, newfid);
453 v9fs_put_idpool(newfid, &v9ses->fidpool); 381 if (err < 0)
454 else 382 dprintk(DEBUG_ERROR, "clunk failed: %d\n", err);
455 dprintk(DEBUG_ERROR, "clunk failed: %s\n",
456 FCALL_ERROR(fcall));
457
458 kfree(fcall);
459 } 383 }
460 if (wfidno >= 0) { 384 if (wfidno >= 0) {
461 if (!v9fs_t_clunk(v9ses, wfidno, &fcall)) 385 err = v9fs_t_clunk(v9ses, wfidno);
462 v9fs_put_idpool(wfidno, &v9ses->fidpool); 386 if (err < 0)
463 else 387 dprintk(DEBUG_ERROR, "clunk failed: %d\n", err);
464 dprintk(DEBUG_ERROR, "clunk failed: %s\n",
465 FCALL_ERROR(fcall));
466
467 kfree(fcall);
468 } 388 }
469 return result; 389 return result;
470} 390}
@@ -509,10 +429,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
509 } 429 }
510 430
511 result = v9fs_t_remove(v9ses, fid, &fcall); 431 result = v9fs_t_remove(v9ses, fid, &fcall);
512 if (result < 0) 432 if (result < 0) {
513 dprintk(DEBUG_ERROR, "remove of file fails: %s(%d)\n", 433 PRINT_FCALL_ERROR("remove fails", fcall);
514 FCALL_ERROR(fcall), result); 434 } else {
515 else {
516 v9fs_put_idpool(fid, &v9ses->fidpool); 435 v9fs_put_idpool(fid, &v9ses->fidpool);
517 v9fs_fid_destroy(v9fid); 436 v9fs_fid_destroy(v9fid);
518 } 437 }
@@ -567,7 +486,6 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
567 struct v9fs_fid *fid; 486 struct v9fs_fid *fid;
568 struct inode *inode; 487 struct inode *inode;
569 struct v9fs_fcall *fcall = NULL; 488 struct v9fs_fcall *fcall = NULL;
570 struct stat newstat;
571 int dirfidnum = -1; 489 int dirfidnum = -1;
572 int newfid = -1; 490 int newfid = -1;
573 int result = 0; 491 int result = 0;
@@ -620,8 +538,8 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
620 goto FreeFcall; 538 goto FreeFcall;
621 } 539 }
622 540
623 v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb); 541 inode = v9fs_get_inode(sb, p9mode2unixmode(v9ses,
624 inode = v9fs_get_inode(sb, newstat.st_mode); 542 fcall->params.rstat.stat.mode));
625 543
626 if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) { 544 if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) {
627 eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n", 545 eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n",
@@ -631,7 +549,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
631 goto FreeFcall; 549 goto FreeFcall;
632 } 550 }
633 551
634 inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid); 552 inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat.qid);
635 553
636 fid = v9fs_fid_create(dentry, v9ses, newfid, 0); 554 fid = v9fs_fid_create(dentry, v9ses, newfid, 0);
637 if (fid == NULL) { 555 if (fid == NULL) {
@@ -640,10 +558,10 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
640 goto FreeFcall; 558 goto FreeFcall;
641 } 559 }
642 560
643 fid->qid = fcall->params.rstat.stat->qid; 561 fid->qid = fcall->params.rstat.stat.qid;
644 562
645 dentry->d_op = &v9fs_dentry_operations; 563 dentry->d_op = &v9fs_dentry_operations;
646 v9fs_mistat2inode(fcall->params.rstat.stat, inode, inode->i_sb); 564 v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb);
647 565
648 d_add(dentry, inode); 566 d_add(dentry, inode);
649 kfree(fcall); 567 kfree(fcall);
@@ -699,7 +617,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
699 v9fs_fid_lookup(old_dentry->d_parent); 617 v9fs_fid_lookup(old_dentry->d_parent);
700 struct v9fs_fid *newdirfid = 618 struct v9fs_fid *newdirfid =
701 v9fs_fid_lookup(new_dentry->d_parent); 619 v9fs_fid_lookup(new_dentry->d_parent);
702 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); 620 struct v9fs_wstat wstat;
703 struct v9fs_fcall *fcall = NULL; 621 struct v9fs_fcall *fcall = NULL;
704 int fid = -1; 622 int fid = -1;
705 int olddirfidnum = -1; 623 int olddirfidnum = -1;
@@ -708,9 +626,6 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
708 626
709 dprintk(DEBUG_VFS, "\n"); 627 dprintk(DEBUG_VFS, "\n");
710 628
711 if (!mistat)
712 return -ENOMEM;
713
714 if ((!oldfid) || (!olddirfid) || (!newdirfid)) { 629 if ((!oldfid) || (!olddirfid) || (!newdirfid)) {
715 dprintk(DEBUG_ERROR, "problem with arguments\n"); 630 dprintk(DEBUG_ERROR, "problem with arguments\n");
716 return -EBADF; 631 return -EBADF;
@@ -734,33 +649,22 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
734 goto FreeFcallnBail; 649 goto FreeFcallnBail;
735 } 650 }
736 651
737 v9fs_blank_mistat(v9ses, mistat); 652 v9fs_blank_wstat(&wstat);
653 wstat.muid = v9ses->name;
654 wstat.name = (char *) new_dentry->d_name.name;
738 655
739 strcpy(mistat->data + 1, v9ses->name); 656 retval = v9fs_t_wstat(v9ses, fid, &wstat, &fcall);
740 mistat->name = mistat->data + 1 + strlen(v9ses->name);
741
742 if (new_dentry->d_name.len >
743 (v9ses->maxdata - strlen(v9ses->name) - sizeof(struct v9fs_stat))) {
744 dprintk(DEBUG_ERROR, "new name too long\n");
745 goto FreeFcallnBail;
746 }
747
748 strcpy(mistat->name, new_dentry->d_name.name);
749 retval = v9fs_t_wstat(v9ses, fid, mistat, &fcall);
750 657
751 FreeFcallnBail: 658 FreeFcallnBail:
752 kfree(mistat);
753
754 if (retval < 0) 659 if (retval < 0)
755 dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", 660 PRINT_FCALL_ERROR("wstat error", fcall);
756 FCALL_ERROR(fcall));
757 661
758 kfree(fcall); 662 kfree(fcall);
759 return retval; 663 return retval;
760} 664}
761 665
762/** 666/**
763 * v9fs_vfs_getattr - retreive file metadata 667 * v9fs_vfs_getattr - retrieve file metadata
764 * @mnt - mount information 668 * @mnt - mount information
765 * @dentry - file to get attributes on 669 * @dentry - file to get attributes on
766 * @stat - metadata structure to populate 670 * @stat - metadata structure to populate
@@ -788,7 +692,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
788 if (err < 0) 692 if (err < 0)
789 dprintk(DEBUG_ERROR, "stat error\n"); 693 dprintk(DEBUG_ERROR, "stat error\n");
790 else { 694 else {
791 v9fs_mistat2inode(fcall->params.rstat.stat, dentry->d_inode, 695 v9fs_stat2inode(&fcall->params.rstat.stat, dentry->d_inode,
792 dentry->d_inode->i_sb); 696 dentry->d_inode->i_sb);
793 generic_fillattr(dentry->d_inode, stat); 697 generic_fillattr(dentry->d_inode, stat);
794 } 698 }
@@ -809,57 +713,44 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
809 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); 713 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
810 struct v9fs_fid *fid = v9fs_fid_lookup(dentry); 714 struct v9fs_fid *fid = v9fs_fid_lookup(dentry);
811 struct v9fs_fcall *fcall = NULL; 715 struct v9fs_fcall *fcall = NULL;
812 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); 716 struct v9fs_wstat wstat;
813 int res = -EPERM; 717 int res = -EPERM;
814 718
815 dprintk(DEBUG_VFS, "\n"); 719 dprintk(DEBUG_VFS, "\n");
816 720
817 if (!mistat)
818 return -ENOMEM;
819
820 if (!fid) { 721 if (!fid) {
821 dprintk(DEBUG_ERROR, 722 dprintk(DEBUG_ERROR,
822 "Couldn't find fid associated with dentry\n"); 723 "Couldn't find fid associated with dentry\n");
823 return -EBADF; 724 return -EBADF;
824 } 725 }
825 726
826 v9fs_blank_mistat(v9ses, mistat); 727 v9fs_blank_wstat(&wstat);
827 if (iattr->ia_valid & ATTR_MODE) 728 if (iattr->ia_valid & ATTR_MODE)
828 mistat->mode = unixmode2p9mode(v9ses, iattr->ia_mode); 729 wstat.mode = unixmode2p9mode(v9ses, iattr->ia_mode);
829 730
830 if (iattr->ia_valid & ATTR_MTIME) 731 if (iattr->ia_valid & ATTR_MTIME)
831 mistat->mtime = iattr->ia_mtime.tv_sec; 732 wstat.mtime = iattr->ia_mtime.tv_sec;
832 733
833 if (iattr->ia_valid & ATTR_ATIME) 734 if (iattr->ia_valid & ATTR_ATIME)
834 mistat->atime = iattr->ia_atime.tv_sec; 735 wstat.atime = iattr->ia_atime.tv_sec;
835 736
836 if (iattr->ia_valid & ATTR_SIZE) 737 if (iattr->ia_valid & ATTR_SIZE)
837 mistat->length = iattr->ia_size; 738 wstat.length = iattr->ia_size;
838 739
839 if (v9ses->extended) { 740 if (v9ses->extended) {
840 char *ptr = mistat->data+1; 741 if (iattr->ia_valid & ATTR_UID)
841 742 wstat.n_uid = iattr->ia_uid;
842 if (iattr->ia_valid & ATTR_UID) {
843 mistat->uid = ptr;
844 ptr += 1+sprintf(ptr, "%08x", iattr->ia_uid);
845 mistat->n_uid = iattr->ia_uid;
846 }
847 743
848 if (iattr->ia_valid & ATTR_GID) { 744 if (iattr->ia_valid & ATTR_GID)
849 mistat->gid = ptr; 745 wstat.n_gid = iattr->ia_gid;
850 ptr += 1+sprintf(ptr, "%08x", iattr->ia_gid);
851 mistat->n_gid = iattr->ia_gid;
852 }
853 } 746 }
854 747
855 res = v9fs_t_wstat(v9ses, fid->fid, mistat, &fcall); 748 res = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall);
856 749
857 if (res < 0) 750 if (res < 0)
858 dprintk(DEBUG_ERROR, "wstat error: %s\n", FCALL_ERROR(fcall)); 751 PRINT_FCALL_ERROR("wstat error", fcall);
859 752
860 kfree(mistat);
861 kfree(fcall); 753 kfree(fcall);
862
863 if (res >= 0) 754 if (res >= 0)
864 res = inode_setattr(dentry->d_inode, iattr); 755 res = inode_setattr(dentry->d_inode, iattr);
865 756
@@ -867,51 +758,47 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
867} 758}
868 759
869/** 760/**
870 * v9fs_mistat2inode - populate an inode structure with mistat info 761 * v9fs_stat2inode - populate an inode structure with mistat info
871 * @mistat: Plan 9 metadata (mistat) structure 762 * @stat: Plan 9 metadata (mistat) structure
872 * @inode: inode to populate 763 * @inode: inode to populate
873 * @sb: superblock of filesystem 764 * @sb: superblock of filesystem
874 * 765 *
875 */ 766 */
876 767
877void 768void
878v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode, 769v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
879 struct super_block *sb) 770 struct super_block *sb)
880{ 771{
772 int n;
773 char ext[32];
881 struct v9fs_session_info *v9ses = sb->s_fs_info; 774 struct v9fs_session_info *v9ses = sb->s_fs_info;
882 775
883 inode->i_nlink = 1; 776 inode->i_nlink = 1;
884 777
885 inode->i_atime.tv_sec = mistat->atime; 778 inode->i_atime.tv_sec = stat->atime;
886 inode->i_mtime.tv_sec = mistat->mtime; 779 inode->i_mtime.tv_sec = stat->mtime;
887 inode->i_ctime.tv_sec = mistat->mtime; 780 inode->i_ctime.tv_sec = stat->mtime;
888 781
889 inode->i_uid = -1; 782 inode->i_uid = v9ses->uid;
890 inode->i_gid = -1; 783 inode->i_gid = v9ses->gid;
891 784
892 if (v9ses->extended) { 785 if (v9ses->extended) {
893 /* TODO: string to uid mapping via user-space daemon */ 786 inode->i_uid = stat->n_uid;
894 inode->i_uid = mistat->n_uid; 787 inode->i_gid = stat->n_gid;
895 inode->i_gid = mistat->n_gid;
896
897 if (mistat->n_uid == -1)
898 sscanf(mistat->uid, "%x", &inode->i_uid);
899
900 if (mistat->n_gid == -1)
901 sscanf(mistat->gid, "%x", &inode->i_gid);
902 } 788 }
903 789
904 if (inode->i_uid == -1) 790 inode->i_mode = p9mode2unixmode(v9ses, stat->mode);
905 inode->i_uid = v9ses->uid;
906 if (inode->i_gid == -1)
907 inode->i_gid = v9ses->gid;
908
909 inode->i_mode = p9mode2unixmode(v9ses, mistat->mode);
910 if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) { 791 if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) {
911 char type = 0; 792 char type = 0;
912 int major = -1; 793 int major = -1;
913 int minor = -1; 794 int minor = -1;
914 sscanf(mistat->extension, "%c %u %u", &type, &major, &minor); 795
796 n = stat->extension.len;
797 if (n > sizeof(ext)-1)
798 n = sizeof(ext)-1;
799 memmove(ext, stat->extension.str, n);
800 ext[n] = 0;
801 sscanf(ext, "%c %u %u", &type, &major, &minor);
915 switch (type) { 802 switch (type) {
916 case 'c': 803 case 'c':
917 inode->i_mode &= ~S_IFBLK; 804 inode->i_mode &= ~S_IFBLK;
@@ -920,14 +807,14 @@ v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode,
920 case 'b': 807 case 'b':
921 break; 808 break;
922 default: 809 default:
923 dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n", 810 dprintk(DEBUG_ERROR, "Unknown special type %c (%.*s)\n",
924 type, mistat->extension); 811 type, stat->extension.len, stat->extension.str);
925 }; 812 };
926 inode->i_rdev = MKDEV(major, minor); 813 inode->i_rdev = MKDEV(major, minor);
927 } else 814 } else
928 inode->i_rdev = 0; 815 inode->i_rdev = 0;
929 816
930 inode->i_size = mistat->length; 817 inode->i_size = stat->length;
931 818
932 inode->i_blksize = sb->s_blocksize; 819 inode->i_blksize = sb->s_blocksize;
933 inode->i_blocks = 820 inode->i_blocks =
@@ -955,71 +842,6 @@ ino_t v9fs_qid2ino(struct v9fs_qid *qid)
955} 842}
956 843
957/** 844/**
958 * v9fs_vfs_symlink - helper function to create symlinks
959 * @dir: directory inode containing symlink
960 * @dentry: dentry for symlink
961 * @symname: symlink data
962 *
963 * See 9P2000.u RFC for more information
964 *
965 */
966
967static int
968v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
969{
970 int retval = -EPERM;
971 struct v9fs_fid *newfid;
972 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
973 struct v9fs_fcall *fcall = NULL;
974 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
975
976 dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
977 symname);
978
979 if (!mistat)
980 return -ENOMEM;
981
982 if (!v9ses->extended) {
983 dprintk(DEBUG_ERROR, "not extended\n");
984 goto FreeFcall;
985 }
986
987 /* issue a create */
988 retval = v9fs_create(dir, dentry, S_IFLNK, 0);
989 if (retval != 0)
990 goto FreeFcall;
991
992 newfid = v9fs_fid_lookup(dentry);
993
994 /* issue a twstat */
995 v9fs_blank_mistat(v9ses, mistat);
996 strcpy(mistat->data + 1, symname);
997 mistat->extension = mistat->data + 1;
998 retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
999 if (retval < 0) {
1000 dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
1001 FCALL_ERROR(fcall));
1002 goto FreeFcall;
1003 }
1004
1005 kfree(fcall);
1006
1007 if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
1008 dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
1009 FCALL_ERROR(fcall));
1010 goto FreeFcall;
1011 }
1012
1013 d_drop(dentry); /* FID - will this also clunk? */
1014
1015 FreeFcall:
1016 kfree(mistat);
1017 kfree(fcall);
1018
1019 return retval;
1020}
1021
1022/**
1023 * v9fs_readlink - read a symlink's location (internal version) 845 * v9fs_readlink - read a symlink's location (internal version)
1024 * @dentry: dentry for symlink 846 * @dentry: dentry for symlink
1025 * @buffer: buffer to load symlink location into 847 * @buffer: buffer to load symlink location into
@@ -1058,16 +880,17 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
1058 if (!fcall) 880 if (!fcall)
1059 return -EIO; 881 return -EIO;
1060 882
1061 if (!(fcall->params.rstat.stat->mode & V9FS_DMSYMLINK)) { 883 if (!(fcall->params.rstat.stat.mode & V9FS_DMSYMLINK)) {
1062 retval = -EINVAL; 884 retval = -EINVAL;
1063 goto FreeFcall; 885 goto FreeFcall;
1064 } 886 }
1065 887
1066 /* copy extension buffer into buffer */ 888 /* copy extension buffer into buffer */
1067 if (strlen(fcall->params.rstat.stat->extension) < buflen) 889 if (fcall->params.rstat.stat.extension.len < buflen)
1068 buflen = strlen(fcall->params.rstat.stat->extension); 890 buflen = fcall->params.rstat.stat.extension.len;
1069 891
1070 memcpy(buffer, fcall->params.rstat.stat->extension, buflen + 1); 892 memcpy(buffer, fcall->params.rstat.stat.extension.str, buflen - 1);
893 buffer[buflen-1] = 0;
1071 894
1072 retval = buflen; 895 retval = buflen;
1073 896
@@ -1157,6 +980,77 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
1157 __putname(s); 980 __putname(s);
1158} 981}
1159 982
983static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
984 int mode, const char *extension)
985{
986 int err, retval;
987 struct v9fs_session_info *v9ses;
988 struct v9fs_fcall *fcall;
989 struct v9fs_fid *fid;
990 struct v9fs_wstat wstat;
991
992 v9ses = v9fs_inode2v9ses(dir);
993 retval = -EPERM;
994 fcall = NULL;
995
996 if (!v9ses->extended) {
997 dprintk(DEBUG_ERROR, "not extended\n");
998 goto free_mem;
999 }
1000
1001 /* issue a create */
1002 retval = v9fs_create(dir, dentry, mode, 0);
1003 if (retval != 0)
1004 goto free_mem;
1005
1006 fid = v9fs_fid_get_created(dentry);
1007 if (!fid) {
1008 dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
1009 goto free_mem;
1010 }
1011
1012 /* issue a Twstat */
1013 v9fs_blank_wstat(&wstat);
1014 wstat.muid = v9ses->name;
1015 wstat.extension = (char *) extension;
1016 retval = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall);
1017 if (retval < 0) {
1018 PRINT_FCALL_ERROR("wstat error", fcall);
1019 goto free_mem;
1020 }
1021
1022 err = v9fs_t_clunk(v9ses, fid->fid);
1023 if (err < 0) {
1024 dprintk(DEBUG_ERROR, "clunk failed: %d\n", err);
1025 goto free_mem;
1026 }
1027
1028 d_drop(dentry); /* FID - will this also clunk? */
1029
1030free_mem:
1031 kfree(fcall);
1032 return retval;
1033}
1034
1035/**
1036 * v9fs_vfs_symlink - helper function to create symlinks
1037 * @dir: directory inode containing symlink
1038 * @dentry: dentry for symlink
1039 * @symname: symlink data
1040 *
1041 * See 9P2000.u RFC for more information
1042 *
1043 */
1044
1045static int
1046v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1047{
1048 dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
1049 symname);
1050
1051 return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname);
1052}
1053
1160/** 1054/**
1161 * v9fs_vfs_link - create a hardlink 1055 * v9fs_vfs_link - create a hardlink
1162 * @old_dentry: dentry for file to link to 1056 * @old_dentry: dentry for file to link to
@@ -1173,64 +1067,24 @@ static int
1173v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, 1067v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1174 struct dentry *dentry) 1068 struct dentry *dentry)
1175{ 1069{
1176 int retval = -EPERM; 1070 int retval;
1177 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); 1071 struct v9fs_fid *oldfid;
1178 struct v9fs_fcall *fcall = NULL; 1072 char *name;
1179 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
1180 struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry);
1181 struct v9fs_fid *newfid = NULL;
1182 char *symname = __getname();
1183 1073
1184 dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, 1074 dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
1185 old_dentry->d_name.name); 1075 old_dentry->d_name.name);
1186 1076
1187 if (!v9ses->extended) { 1077 oldfid = v9fs_fid_lookup(old_dentry);
1188 dprintk(DEBUG_ERROR, "not extended\n"); 1078 if (!oldfid) {
1189 goto FreeMem; 1079 dprintk(DEBUG_ERROR, "can't find oldfid\n");
1190 } 1080 return -EPERM;
1191
1192 /* get fid of old_dentry */
1193 sprintf(symname, "hardlink(%d)\n", oldfid->fid);
1194
1195 /* issue a create */
1196 retval = v9fs_create(dir, dentry, V9FS_DMLINK, 0);
1197 if (retval != 0)
1198 goto FreeMem;
1199
1200 newfid = v9fs_fid_lookup(dentry);
1201 if (!newfid) {
1202 dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
1203 goto FreeMem;
1204 }
1205
1206 /* issue a twstat */
1207 v9fs_blank_mistat(v9ses, mistat);
1208 strcpy(mistat->data + 1, symname);
1209 mistat->extension = mistat->data + 1;
1210 retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
1211 if (retval < 0) {
1212 dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
1213 FCALL_ERROR(fcall));
1214 goto FreeMem;
1215 }
1216
1217 kfree(fcall);
1218
1219 if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
1220 dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
1221 FCALL_ERROR(fcall));
1222 goto FreeMem;
1223 } 1081 }
1224 1082
1225 d_drop(dentry); /* FID - will this also clunk? */ 1083 name = __getname();
1226 1084 sprintf(name, "hardlink(%d)\n", oldfid->fid);
1227 kfree(fcall); 1085 retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name);
1228 fcall = NULL; 1086 __putname(name);
1229 1087
1230 FreeMem:
1231 kfree(mistat);
1232 kfree(fcall);
1233 __putname(symname);
1234 return retval; 1088 return retval;
1235} 1089}
1236 1090
@@ -1246,82 +1100,30 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1246static int 1100static int
1247v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) 1101v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1248{ 1102{
1249 int retval = -EPERM; 1103 int retval;
1250 struct v9fs_fid *newfid; 1104 char *name;
1251 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
1252 struct v9fs_fcall *fcall = NULL;
1253 struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
1254 char *symname = __getname();
1255 1105
1256 dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, 1106 dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
1257 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); 1107 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
1258 1108
1259 if (!mistat) 1109 if (!new_valid_dev(rdev))
1260 return -ENOMEM; 1110 return -EINVAL;
1261
1262 if (!new_valid_dev(rdev)) {
1263 retval = -EINVAL;
1264 goto FreeMem;
1265 }
1266
1267 if (!v9ses->extended) {
1268 dprintk(DEBUG_ERROR, "not extended\n");
1269 goto FreeMem;
1270 }
1271
1272 /* issue a create */
1273 retval = v9fs_create(dir, dentry, mode, 0);
1274
1275 if (retval != 0)
1276 goto FreeMem;
1277
1278 newfid = v9fs_fid_lookup(dentry);
1279 if (!newfid) {
1280 dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n");
1281 retval = -EINVAL;
1282 goto FreeMem;
1283 }
1284 1111
1112 name = __getname();
1285 /* build extension */ 1113 /* build extension */
1286 if (S_ISBLK(mode)) 1114 if (S_ISBLK(mode))
1287 sprintf(symname, "b %u %u", MAJOR(rdev), MINOR(rdev)); 1115 sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev));
1288 else if (S_ISCHR(mode)) 1116 else if (S_ISCHR(mode))
1289 sprintf(symname, "c %u %u", MAJOR(rdev), MINOR(rdev)); 1117 sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev));
1290 else if (S_ISFIFO(mode)) 1118 else if (S_ISFIFO(mode))
1291 ; /* DO NOTHING */ 1119 *name = 0;
1292 else { 1120 else {
1293 retval = -EINVAL; 1121 __putname(name);
1294 goto FreeMem; 1122 return -EINVAL;
1295 }
1296
1297 if (!S_ISFIFO(mode)) {
1298 /* issue a twstat */
1299 v9fs_blank_mistat(v9ses, mistat);
1300 strcpy(mistat->data + 1, symname);
1301 mistat->extension = mistat->data + 1;
1302 retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
1303 if (retval < 0) {
1304 dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
1305 FCALL_ERROR(fcall));
1306 goto FreeMem;
1307 }
1308 } 1123 }
1309 1124
1310 /* need to update dcache so we show up */ 1125 retval = v9fs_vfs_mkspecial(dir, dentry, mode, name);
1311 kfree(fcall); 1126 __putname(name);
1312
1313 if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
1314 dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
1315 FCALL_ERROR(fcall));
1316 goto FreeMem;
1317 }
1318
1319 d_drop(dentry); /* FID - will this also clunk? */
1320
1321 FreeMem:
1322 kfree(mistat);
1323 kfree(fcall);
1324 __putname(symname);
1325 1127
1326 return retval; 1128 return retval;
1327} 1129}
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 82c5b0084079..2c4fa75be025 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -44,7 +44,6 @@
44#include "v9fs.h" 44#include "v9fs.h"
45#include "9p.h" 45#include "9p.h"
46#include "v9fs_vfs.h" 46#include "v9fs_vfs.h"
47#include "conv.h"
48#include "fid.h" 47#include "fid.h"
49 48
50static void v9fs_clear_inode(struct inode *); 49static void v9fs_clear_inode(struct inode *);
@@ -92,7 +91,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
92 sb->s_op = &v9fs_super_ops; 91 sb->s_op = &v9fs_super_ops;
93 92
94 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | 93 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
95 MS_NODIRATIME | MS_NOATIME; 94 MS_NOATIME;
96} 95}
97 96
98/** 97/**
@@ -123,12 +122,13 @@ static struct super_block *v9fs_get_sb(struct file_system_type
123 122
124 dprintk(DEBUG_VFS, " \n"); 123 dprintk(DEBUG_VFS, " \n");
125 124
126 v9ses = kcalloc(1, sizeof(struct v9fs_session_info), GFP_KERNEL); 125 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
127 if (!v9ses) 126 if (!v9ses)
128 return ERR_PTR(-ENOMEM); 127 return ERR_PTR(-ENOMEM);
129 128
130 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { 129 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
131 dprintk(DEBUG_ERROR, "problem initiating session\n"); 130 dprintk(DEBUG_ERROR, "problem initiating session\n");
131 kfree(v9ses);
132 return ERR_PTR(newfid); 132 return ERR_PTR(newfid);
133 } 133 }
134 134
@@ -157,7 +157,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type
157 stat_result = v9fs_t_stat(v9ses, newfid, &fcall); 157 stat_result = v9fs_t_stat(v9ses, newfid, &fcall);
158 if (stat_result < 0) { 158 if (stat_result < 0) {
159 dprintk(DEBUG_ERROR, "stat error\n"); 159 dprintk(DEBUG_ERROR, "stat error\n");
160 v9fs_t_clunk(v9ses, newfid, NULL); 160 v9fs_t_clunk(v9ses, newfid);
161 v9fs_put_idpool(newfid, &v9ses->fidpool); 161 v9fs_put_idpool(newfid, &v9ses->fidpool);
162 } else { 162 } else {
163 /* Setup the Root Inode */ 163 /* Setup the Root Inode */
@@ -167,10 +167,10 @@ static struct super_block *v9fs_get_sb(struct file_system_type
167 goto put_back_sb; 167 goto put_back_sb;
168 } 168 }
169 169
170 root_fid->qid = fcall->params.rstat.stat->qid; 170 root_fid->qid = fcall->params.rstat.stat.qid;
171 root->d_inode->i_ino = 171 root->d_inode->i_ino =
172 v9fs_qid2ino(&fcall->params.rstat.stat->qid); 172 v9fs_qid2ino(&fcall->params.rstat.stat.qid);
173 v9fs_mistat2inode(fcall->params.rstat.stat, root->d_inode, sb); 173 v9fs_stat2inode(&fcall->params.rstat.stat, root->d_inode, sb);
174 } 174 }
175 175
176 kfree(fcall); 176 kfree(fcall);
diff --git a/fs/Kconfig b/fs/Kconfig
index 382e3b2883d5..ef78e3a42d32 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -798,7 +798,7 @@ config PROC_KCORE
798 798
799config PROC_VMCORE 799config PROC_VMCORE
800 bool "/proc/vmcore support (EXPERIMENTAL)" 800 bool "/proc/vmcore support (EXPERIMENTAL)"
801 depends on PROC_FS && EMBEDDED && EXPERIMENTAL && CRASH_DUMP 801 depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP
802 help 802 help
803 Exports the dump image of crashed kernel in ELF format. 803 Exports the dump image of crashed kernel in ELF format.
804 804
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 175b2e8177c1..f3d3d81eb7e9 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -1,6 +1,6 @@
1config BINFMT_ELF 1config BINFMT_ELF
2 bool "Kernel support for ELF binaries" 2 bool "Kernel support for ELF binaries"
3 depends on MMU 3 depends on MMU && (BROKEN || !FRV)
4 default y 4 default y
5 ---help--- 5 ---help---
6 ELF (Executable and Linkable Format) is a format for libraries and 6 ELF (Executable and Linkable Format) is a format for libraries and
diff --git a/fs/Makefile b/fs/Makefile
index 73676111ebbe..1db711319c80 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,11 +10,11 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \
10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ 10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ 11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ 12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
13 ioprio.o pnode.o 13 ioprio.o pnode.o drop_caches.o
14 14
15obj-$(CONFIG_INOTIFY) += inotify.o 15obj-$(CONFIG_INOTIFY) += inotify.o
16obj-$(CONFIG_EPOLL) += eventpoll.o 16obj-$(CONFIG_EPOLL) += eventpoll.o
17obj-$(CONFIG_COMPAT) += compat.o 17obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
18 18
19nfsd-$(CONFIG_NFSD) := nfsctl.o 19nfsd-$(CONFIG_NFSD) := nfsctl.o
20obj-y += $(nfsd-y) $(nfsd-m) 20obj-y += $(nfsd-y) $(nfsd-m)
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 9ebe881c6786..44d439cb69f4 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -244,10 +244,10 @@ affs_put_inode(struct inode *inode)
244 pr_debug("AFFS: put_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); 244 pr_debug("AFFS: put_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
245 affs_free_prealloc(inode); 245 affs_free_prealloc(inode);
246 if (atomic_read(&inode->i_count) == 1) { 246 if (atomic_read(&inode->i_count) == 1) {
247 down(&inode->i_sem); 247 mutex_lock(&inode->i_mutex);
248 if (inode->i_size != AFFS_I(inode)->mmu_private) 248 if (inode->i_size != AFFS_I(inode)->mmu_private)
249 affs_truncate(inode); 249 affs_truncate(inode);
250 up(&inode->i_sem); 250 mutex_unlock(&inode->i_mutex);
251 } 251 }
252} 252}
253 253
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 0a57fd7c726f..9eef6bf156ab 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -118,7 +118,7 @@ static int kafscmd(void *arg)
118 _SRXAFSCM_xxxx_t func; 118 _SRXAFSCM_xxxx_t func;
119 int die; 119 int die;
120 120
121 printk("kAFS: Started kafscmd %d\n", current->pid); 121 printk(KERN_INFO "kAFS: Started kafscmd %d\n", current->pid);
122 122
123 daemonize("kafscmd"); 123 daemonize("kafscmd");
124 124
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 6682d6d7f294..5c61c24dab2a 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -137,7 +137,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
137#endif 137#endif
138 138
139 /* determine how many magic numbers there should be in this page */ 139 /* determine how many magic numbers there should be in this page */
140 latter = dir->i_size - (page->index << PAGE_CACHE_SHIFT); 140 latter = dir->i_size - page_offset(page);
141 if (latter >= PAGE_SIZE) 141 if (latter >= PAGE_SIZE)
142 qty = PAGE_SIZE; 142 qty = PAGE_SIZE;
143 else 143 else
diff --git a/fs/afs/volume.h b/fs/afs/volume.h
index 1e691889c4c9..bfdcf19ba3f3 100644
--- a/fs/afs/volume.h
+++ b/fs/afs/volume.h
@@ -18,8 +18,6 @@
18#include "kafsasyncd.h" 18#include "kafsasyncd.h"
19#include "cache.h" 19#include "cache.h"
20 20
21#define __packed __attribute__((packed))
22
23typedef enum { 21typedef enum {
24 AFS_VLUPD_SLEEP, /* sleeping waiting for update timer to fire */ 22 AFS_VLUPD_SLEEP, /* sleeping waiting for update timer to fire */
25 AFS_VLUPD_PENDING, /* on pending queue */ 23 AFS_VLUPD_PENDING, /* on pending queue */
@@ -115,7 +113,7 @@ struct afs_volume
115 struct cachefs_cookie *cache; /* caching cookie */ 113 struct cachefs_cookie *cache; /* caching cookie */
116#endif 114#endif
117 afs_volid_t vid; /* volume ID */ 115 afs_volid_t vid; /* volume ID */
118 afs_voltype_t __packed type; /* type of volume */ 116 afs_voltype_t type; /* type of volume */
119 char type_force; /* force volume type (suppress R/O -> R/W) */ 117 char type_force; /* force volume type (suppress R/O -> R/W) */
120 unsigned short nservers; /* number of server slots filled */ 118 unsigned short nservers; /* number of server slots filled */
121 unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ 119 unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */
diff --git a/fs/aio.c b/fs/aio.c
index 5a28b69ad223..aec2b1916d1b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -29,7 +29,6 @@
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/workqueue.h> 30#include <linux/workqueue.h>
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/rcuref.h>
33 32
34#include <asm/kmap_types.h> 33#include <asm/kmap_types.h>
35#include <asm/uaccess.h> 34#include <asm/uaccess.h>
@@ -514,7 +513,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
514 /* Must be done under the lock to serialise against cancellation. 513 /* Must be done under the lock to serialise against cancellation.
515 * Call this aio_fput as it duplicates fput via the fput_work. 514 * Call this aio_fput as it duplicates fput via the fput_work.
516 */ 515 */
517 if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) { 516 if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) {
518 get_ioctx(ctx); 517 get_ioctx(ctx);
519 spin_lock(&fput_lock); 518 spin_lock(&fput_lock);
520 list_add(&req->ki_list, &fput_head); 519 list_add(&req->ki_list, &fput_head);
diff --git a/fs/attr.c b/fs/attr.c
index 67bcd9b14ea5..97de94670878 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -10,11 +10,11 @@
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/string.h> 11#include <linux/string.h>
12#include <linux/smp_lock.h> 12#include <linux/smp_lock.h>
13#include <linux/capability.h>
13#include <linux/fsnotify.h> 14#include <linux/fsnotify.h>
14#include <linux/fcntl.h> 15#include <linux/fcntl.h>
15#include <linux/quotaops.h> 16#include <linux/quotaops.h>
16#include <linux/security.h> 17#include <linux/security.h>
17#include <linux/time.h>
18 18
19/* Taken over from the old code... */ 19/* Taken over from the old code... */
20 20
@@ -67,20 +67,12 @@ EXPORT_SYMBOL(inode_change_ok);
67int inode_setattr(struct inode * inode, struct iattr * attr) 67int inode_setattr(struct inode * inode, struct iattr * attr)
68{ 68{
69 unsigned int ia_valid = attr->ia_valid; 69 unsigned int ia_valid = attr->ia_valid;
70 int error = 0; 70
71 71 if (ia_valid & ATTR_SIZE &&
72 if (ia_valid & ATTR_SIZE) { 72 attr->ia_size != i_size_read(inode)) {
73 if (attr->ia_size != i_size_read(inode)) { 73 int error = vmtruncate(inode, attr->ia_size);
74 error = vmtruncate(inode, attr->ia_size); 74 if (error)
75 if (error || (ia_valid == ATTR_SIZE)) 75 return error;
76 goto out;
77 } else {
78 /*
79 * We skipped the truncate but must still update
80 * timestamps
81 */
82 ia_valid |= ATTR_MTIME|ATTR_CTIME;
83 }
84 } 76 }
85 77
86 if (ia_valid & ATTR_UID) 78 if (ia_valid & ATTR_UID)
@@ -104,8 +96,8 @@ int inode_setattr(struct inode * inode, struct iattr * attr)
104 inode->i_mode = mode; 96 inode->i_mode = mode;
105 } 97 }
106 mark_inode_dirty(inode); 98 mark_inode_dirty(inode);
107out: 99
108 return error; 100 return 0;
109} 101}
110EXPORT_SYMBOL(inode_setattr); 102EXPORT_SYMBOL(inode_setattr);
111 103
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index a1ab1c0ed215..870e2cf33016 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -10,6 +10,7 @@
10 * 10 *
11 * ------------------------------------------------------------------------- */ 11 * ------------------------------------------------------------------------- */
12 12
13#include <linux/capability.h>
13#include <linux/errno.h> 14#include <linux/errno.h>
14#include <linux/stat.h> 15#include <linux/stat.h>
15#include <linux/param.h> 16#include <linux/param.h>
@@ -229,9 +230,9 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr
229 dentry->d_flags |= DCACHE_AUTOFS_PENDING; 230 dentry->d_flags |= DCACHE_AUTOFS_PENDING;
230 d_add(dentry, NULL); 231 d_add(dentry, NULL);
231 232
232 up(&dir->i_sem); 233 mutex_unlock(&dir->i_mutex);
233 autofs_revalidate(dentry, nd); 234 autofs_revalidate(dentry, nd);
234 down(&dir->i_sem); 235 mutex_lock(&dir->i_mutex);
235 236
236 /* 237 /*
237 * If we are still pending, check if we had to handle 238 * If we are still pending, check if we had to handle
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index fca83e28edcf..385bed09b0d8 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -209,7 +209,7 @@ static inline int simple_empty_nolock(struct dentry *dentry)
209 struct dentry *child; 209 struct dentry *child;
210 int ret = 0; 210 int ret = 0;
211 211
212 list_for_each_entry(child, &dentry->d_subdirs, d_child) 212 list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
213 if (simple_positive(child)) 213 if (simple_positive(child))
214 goto out; 214 goto out;
215 ret = 1; 215 ret = 1;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index feb6ac427d05..dc39589df165 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -105,7 +105,7 @@ repeat:
105 next = this_parent->d_subdirs.next; 105 next = this_parent->d_subdirs.next;
106resume: 106resume:
107 while (next != &this_parent->d_subdirs) { 107 while (next != &this_parent->d_subdirs) {
108 struct dentry *dentry = list_entry(next, struct dentry, d_child); 108 struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
109 109
110 /* Negative dentry - give up */ 110 /* Negative dentry - give up */
111 if (!simple_positive(dentry)) { 111 if (!simple_positive(dentry)) {
@@ -138,7 +138,7 @@ resume:
138 } 138 }
139 139
140 if (this_parent != top) { 140 if (this_parent != top) {
141 next = this_parent->d_child.next; 141 next = this_parent->d_u.d_child.next;
142 this_parent = this_parent->d_parent; 142 this_parent = this_parent->d_parent;
143 goto resume; 143 goto resume;
144 } 144 }
@@ -163,7 +163,7 @@ repeat:
163 next = this_parent->d_subdirs.next; 163 next = this_parent->d_subdirs.next;
164resume: 164resume:
165 while (next != &this_parent->d_subdirs) { 165 while (next != &this_parent->d_subdirs) {
166 struct dentry *dentry = list_entry(next, struct dentry, d_child); 166 struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
167 167
168 /* Negative dentry - give up */ 168 /* Negative dentry - give up */
169 if (!simple_positive(dentry)) { 169 if (!simple_positive(dentry)) {
@@ -199,7 +199,7 @@ cont:
199 } 199 }
200 200
201 if (this_parent != parent) { 201 if (this_parent != parent) {
202 next = this_parent->d_child.next; 202 next = this_parent->d_u.d_child.next;
203 this_parent = this_parent->d_parent; 203 this_parent = this_parent->d_parent;
204 goto resume; 204 goto resume;
205 } 205 }
@@ -238,7 +238,7 @@ static struct dentry *autofs4_expire(struct super_block *sb,
238 /* On exit from the loop expire is set to a dgot dentry 238 /* On exit from the loop expire is set to a dgot dentry
239 * to expire or it's NULL */ 239 * to expire or it's NULL */
240 while ( next != &root->d_subdirs ) { 240 while ( next != &root->d_subdirs ) {
241 struct dentry *dentry = list_entry(next, struct dentry, d_child); 241 struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
242 242
243 /* Negative dentry - give up */ 243 /* Negative dentry - give up */
244 if ( !simple_positive(dentry) ) { 244 if ( !simple_positive(dentry) ) {
@@ -302,7 +302,7 @@ next:
302 expired, (int)expired->d_name.len, expired->d_name.name); 302 expired, (int)expired->d_name.len, expired->d_name.name);
303 spin_lock(&dcache_lock); 303 spin_lock(&dcache_lock);
304 list_del(&expired->d_parent->d_subdirs); 304 list_del(&expired->d_parent->d_subdirs);
305 list_add(&expired->d_parent->d_subdirs, &expired->d_child); 305 list_add(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
306 spin_unlock(&dcache_lock); 306 spin_unlock(&dcache_lock);
307 return expired; 307 return expired;
308 } 308 }
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 818b37be5153..2d3082854a29 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -91,7 +91,7 @@ repeat:
91 next = this_parent->d_subdirs.next; 91 next = this_parent->d_subdirs.next;
92resume: 92resume:
93 while (next != &this_parent->d_subdirs) { 93 while (next != &this_parent->d_subdirs) {
94 struct dentry *dentry = list_entry(next, struct dentry, d_child); 94 struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
95 95
96 /* Negative dentry - don`t care */ 96 /* Negative dentry - don`t care */
97 if (!simple_positive(dentry)) { 97 if (!simple_positive(dentry)) {
@@ -117,7 +117,7 @@ resume:
117 if (this_parent != sbi->root) { 117 if (this_parent != sbi->root) {
118 struct dentry *dentry = this_parent; 118 struct dentry *dentry = this_parent;
119 119
120 next = this_parent->d_child.next; 120 next = this_parent->d_u.d_child.next;
121 this_parent = this_parent->d_parent; 121 this_parent = this_parent->d_parent;
122 spin_unlock(&dcache_lock); 122 spin_unlock(&dcache_lock);
123 DPRINTK("parent dentry %p %.*s", 123 DPRINTK("parent dentry %p %.*s",
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 2a771ec66956..62d8d4acb8bb 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -12,6 +12,7 @@
12 * 12 *
13 * ------------------------------------------------------------------------- */ 13 * ------------------------------------------------------------------------- */
14 14
15#include <linux/capability.h>
15#include <linux/errno.h> 16#include <linux/errno.h>
16#include <linux/stat.h> 17#include <linux/stat.h>
17#include <linux/param.h> 18#include <linux/param.h>
@@ -86,7 +87,7 @@ static int autofs4_root_readdir(struct file *file, void *dirent,
86 87
87/* Update usage from here to top of tree, so that scan of 88/* Update usage from here to top of tree, so that scan of
88 top-level directories will give a useful result */ 89 top-level directories will give a useful result */
89static void autofs4_update_usage(struct dentry *dentry) 90static void autofs4_update_usage(struct vfsmount *mnt, struct dentry *dentry)
90{ 91{
91 struct dentry *top = dentry->d_sb->s_root; 92 struct dentry *top = dentry->d_sb->s_root;
92 93
@@ -95,7 +96,7 @@ static void autofs4_update_usage(struct dentry *dentry)
95 struct autofs_info *ino = autofs4_dentry_ino(dentry); 96 struct autofs_info *ino = autofs4_dentry_ino(dentry);
96 97
97 if (ino) { 98 if (ino) {
98 update_atime(dentry->d_inode); 99 touch_atime(mnt, dentry);
99 ino->last_used = jiffies; 100 ino->last_used = jiffies;
100 } 101 }
101 } 102 }
@@ -143,7 +144,8 @@ static int autofs4_dcache_readdir(struct file * filp, void * dirent, filldir_t f
143 } 144 }
144 145
145 while(1) { 146 while(1) {
146 struct dentry *de = list_entry(list, struct dentry, d_child); 147 struct dentry *de = list_entry(list,
148 struct dentry, d_u.d_child);
147 149
148 if (!d_unhashed(de) && de->d_inode) { 150 if (!d_unhashed(de) && de->d_inode) {
149 spin_unlock(&dcache_lock); 151 spin_unlock(&dcache_lock);
@@ -193,6 +195,8 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
193 if (!empty) 195 if (!empty)
194 d_invalidate(dentry); 196 d_invalidate(dentry);
195 197
198 nd.dentry = dentry;
199 nd.mnt = mnt;
196 nd.flags = LOOKUP_DIRECTORY; 200 nd.flags = LOOKUP_DIRECTORY;
197 status = (dentry->d_op->d_revalidate)(dentry, &nd); 201 status = (dentry->d_op->d_revalidate)(dentry, &nd);
198 202
@@ -288,10 +292,10 @@ out:
288 return autofs4_dcache_readdir(file, dirent, filldir); 292 return autofs4_dcache_readdir(file, dirent, filldir);
289} 293}
290 294
291static int try_to_fill_dentry(struct dentry *dentry, 295static int try_to_fill_dentry(struct vfsmount *mnt, struct dentry *dentry, int flags)
292 struct super_block *sb,
293 struct autofs_sb_info *sbi, int flags)
294{ 296{
297 struct super_block *sb = mnt->mnt_sb;
298 struct autofs_sb_info *sbi = autofs4_sbi(sb);
295 struct autofs_info *de_info = autofs4_dentry_ino(dentry); 299 struct autofs_info *de_info = autofs4_dentry_ino(dentry);
296 int status = 0; 300 int status = 0;
297 301
@@ -366,7 +370,7 @@ static int try_to_fill_dentry(struct dentry *dentry,
366 /* We don't update the usages for the autofs daemon itself, this 370 /* We don't update the usages for the autofs daemon itself, this
367 is necessary for recursive autofs mounts */ 371 is necessary for recursive autofs mounts */
368 if (!autofs4_oz_mode(sbi)) 372 if (!autofs4_oz_mode(sbi))
369 autofs4_update_usage(dentry); 373 autofs4_update_usage(mnt, dentry);
370 374
371 spin_lock(&dentry->d_lock); 375 spin_lock(&dentry->d_lock);
372 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 376 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
@@ -391,7 +395,7 @@ static int autofs4_revalidate(struct dentry * dentry, struct nameidata *nd)
391 /* Pending dentry */ 395 /* Pending dentry */
392 if (autofs4_ispending(dentry)) { 396 if (autofs4_ispending(dentry)) {
393 if (!oz_mode) 397 if (!oz_mode)
394 status = try_to_fill_dentry(dentry, dir->i_sb, sbi, flags); 398 status = try_to_fill_dentry(nd->mnt, dentry, flags);
395 return status; 399 return status;
396 } 400 }
397 401
@@ -408,14 +412,14 @@ static int autofs4_revalidate(struct dentry * dentry, struct nameidata *nd)
408 dentry, dentry->d_name.len, dentry->d_name.name); 412 dentry, dentry->d_name.len, dentry->d_name.name);
409 spin_unlock(&dcache_lock); 413 spin_unlock(&dcache_lock);
410 if (!oz_mode) 414 if (!oz_mode)
411 status = try_to_fill_dentry(dentry, dir->i_sb, sbi, flags); 415 status = try_to_fill_dentry(nd->mnt, dentry, flags);
412 return status; 416 return status;
413 } 417 }
414 spin_unlock(&dcache_lock); 418 spin_unlock(&dcache_lock);
415 419
416 /* Update the usage list */ 420 /* Update the usage list */
417 if (!oz_mode) 421 if (!oz_mode)
418 autofs4_update_usage(dentry); 422 autofs4_update_usage(nd->mnt, dentry);
419 423
420 return 1; 424 return 1;
421} 425}
@@ -488,9 +492,9 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
488 d_add(dentry, NULL); 492 d_add(dentry, NULL);
489 493
490 if (dentry->d_op && dentry->d_op->d_revalidate) { 494 if (dentry->d_op && dentry->d_op->d_revalidate) {
491 up(&dir->i_sem); 495 mutex_unlock(&dir->i_mutex);
492 (dentry->d_op->d_revalidate)(dentry, nd); 496 (dentry->d_op->d_revalidate)(dentry, nd);
493 down(&dir->i_sem); 497 mutex_lock(&dir->i_mutex);
494 } 498 }
495 499
496 /* 500 /*
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 72011826f0cb..f312103434d4 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -33,8 +33,6 @@ static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
33static int load_aout_library(struct file*); 33static int load_aout_library(struct file*);
34static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file); 34static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file);
35 35
36extern void dump_thread(struct pt_regs *, struct user *);
37
38static struct linux_binfmt aout_format = { 36static struct linux_binfmt aout_format = {
39 .module = THIS_MODULE, 37 .module = THIS_MODULE,
40 .load_binary = load_aout_binary, 38 .load_binary = load_aout_binary,
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f36f2210204f..1b117a441298 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -58,7 +58,7 @@ extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
58 * If we don't support core dumping, then supply a NULL so we 58 * If we don't support core dumping, then supply a NULL so we
59 * don't even try. 59 * don't even try.
60 */ 60 */
61#ifdef USE_ELF_CORE_DUMP 61#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
62static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file); 62static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file);
63#else 63#else
64#define elf_core_dump NULL 64#define elf_core_dump NULL
@@ -288,11 +288,17 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
288 struct elf_phdr *eppnt, int prot, int type) 288 struct elf_phdr *eppnt, int prot, int type)
289{ 289{
290 unsigned long map_addr; 290 unsigned long map_addr;
291 unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
291 292
292 down_write(&current->mm->mmap_sem); 293 down_write(&current->mm->mmap_sem);
293 map_addr = do_mmap(filep, ELF_PAGESTART(addr), 294 /* mmap() will return -EINVAL if given a zero size, but a
294 eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, type, 295 * segment with zero filesize is perfectly valid */
295 eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr)); 296 if (eppnt->p_filesz + pageoffset)
297 map_addr = do_mmap(filep, ELF_PAGESTART(addr),
298 eppnt->p_filesz + pageoffset, prot, type,
299 eppnt->p_offset - pageoffset);
300 else
301 map_addr = ELF_PAGESTART(addr);
296 up_write(&current->mm->mmap_sem); 302 up_write(&current->mm->mmap_sem);
297 return(map_addr); 303 return(map_addr);
298} 304}
@@ -616,7 +622,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
616 goto out_free_file; 622 goto out_free_file;
617 623
618 retval = -ENOMEM; 624 retval = -ENOMEM;
619 elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz, 625 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
620 GFP_KERNEL); 626 GFP_KERNEL);
621 if (!elf_interpreter) 627 if (!elf_interpreter)
622 goto out_free_file; 628 goto out_free_file;
@@ -1107,7 +1113,7 @@ out:
1107 * Note that some platforms still use traditional core dumps and not 1113 * Note that some platforms still use traditional core dumps and not
1108 * the ELF core dump. Each platform can select it as appropriate. 1114 * the ELF core dump. Each platform can select it as appropriate.
1109 */ 1115 */
1110#ifdef USE_ELF_CORE_DUMP 1116#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1111 1117
1112/* 1118/*
1113 * ELF core dumper 1119 * ELF core dumper
@@ -1212,7 +1218,7 @@ static int writenote(struct memelfnote *men, struct file *file)
1212 if (!dump_seek(file, (off))) \ 1218 if (!dump_seek(file, (off))) \
1213 goto end_coredump; 1219 goto end_coredump;
1214 1220
1215static inline void fill_elf_header(struct elfhdr *elf, int segs) 1221static void fill_elf_header(struct elfhdr *elf, int segs)
1216{ 1222{
1217 memcpy(elf->e_ident, ELFMAG, SELFMAG); 1223 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1218 elf->e_ident[EI_CLASS] = ELF_CLASS; 1224 elf->e_ident[EI_CLASS] = ELF_CLASS;
@@ -1237,7 +1243,7 @@ static inline void fill_elf_header(struct elfhdr *elf, int segs)
1237 return; 1243 return;
1238} 1244}
1239 1245
1240static inline void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset) 1246static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
1241{ 1247{
1242 phdr->p_type = PT_NOTE; 1248 phdr->p_type = PT_NOTE;
1243 phdr->p_offset = offset; 1249 phdr->p_offset = offset;
@@ -1628,17 +1634,17 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
1628 ELF_CORE_WRITE_EXTRA_DATA; 1634 ELF_CORE_WRITE_EXTRA_DATA;
1629#endif 1635#endif
1630 1636
1631 if ((off_t) file->f_pos != offset) { 1637 if ((off_t)file->f_pos != offset) {
1632 /* Sanity check */ 1638 /* Sanity check */
1633 printk("elf_core_dump: file->f_pos (%ld) != offset (%ld)\n", 1639 printk(KERN_WARNING "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
1634 (off_t) file->f_pos, offset); 1640 (off_t)file->f_pos, offset);
1635 } 1641 }
1636 1642
1637end_coredump: 1643end_coredump:
1638 set_fs(fs); 1644 set_fs(fs);
1639 1645
1640cleanup: 1646cleanup:
1641 while(!list_empty(&thread_list)) { 1647 while (!list_empty(&thread_list)) {
1642 struct list_head *tmp = thread_list.next; 1648 struct list_head *tmp = thread_list.next;
1643 list_del(tmp); 1649 list_del(tmp);
1644 kfree(list_entry(tmp, struct elf_thread_status, list)); 1650 kfree(list_entry(tmp, struct elf_thread_status, list));
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index e0344f69c79d..5b3076e8ee90 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -187,7 +187,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
187 goto error; 187 goto error;
188 188
189 /* read the name of the interpreter into memory */ 189 /* read the name of the interpreter into memory */
190 interpreter_name = (char *) kmalloc(phdr->p_filesz, GFP_KERNEL); 190 interpreter_name = kmalloc(phdr->p_filesz, GFP_KERNEL);
191 if (!interpreter_name) 191 if (!interpreter_name)
192 goto error; 192 goto error;
193 193
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 9d6625829b99..108d56bbd0d0 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -77,8 +77,6 @@ static int load_flat_shared_library(int id, struct lib_info *p);
77static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs); 77static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
78static int flat_core_dump(long signr, struct pt_regs * regs, struct file *file); 78static int flat_core_dump(long signr, struct pt_regs * regs, struct file *file);
79 79
80extern void dump_thread(struct pt_regs *, struct user *);
81
82static struct linux_binfmt flat_format = { 80static struct linux_binfmt flat_format = {
83 .module = THIS_MODULE, 81 .module = THIS_MODULE,
84 .load_binary = load_flat_binary, 82 .load_binary = load_flat_binary,
@@ -444,19 +442,22 @@ static int load_flat_file(struct linux_binprm * bprm,
444 flags = ntohl(hdr->flags); 442 flags = ntohl(hdr->flags);
445 rev = ntohl(hdr->rev); 443 rev = ntohl(hdr->rev);
446 444
447 if (flags & FLAT_FLAG_KTRACE) 445 if (strncmp(hdr->magic, "bFLT", 4)) {
448 printk("BINFMT_FLAT: Loading file: %s\n", bprm->filename);
449
450 if (strncmp(hdr->magic, "bFLT", 4) ||
451 (rev != FLAT_VERSION && rev != OLD_FLAT_VERSION)) {
452 /* 446 /*
453 * because a lot of people do not manage to produce good 447 * because a lot of people do not manage to produce good
454 * flat binaries, we leave this printk to help them realise 448 * flat binaries, we leave this printk to help them realise
455 * the problem. We only print the error if its not a script file 449 * the problem. We only print the error if its not a script file
456 */ 450 */
457 if (strncmp(hdr->magic, "#!", 2)) 451 if (strncmp(hdr->magic, "#!", 2))
458 printk("BINFMT_FLAT: bad magic/rev (0x%x, need 0x%x)\n", 452 printk("BINFMT_FLAT: bad header magic\n");
459 rev, (int) FLAT_VERSION); 453 return -ENOEXEC;
454 }
455
456 if (flags & FLAT_FLAG_KTRACE)
457 printk("BINFMT_FLAT: Loading file: %s\n", bprm->filename);
458
459 if (rev != FLAT_VERSION && rev != OLD_FLAT_VERSION) {
460 printk("BINFMT_FLAT: bad flat file version 0x%x (supported 0x%x and 0x%x)\n", rev, FLAT_VERSION, OLD_FLAT_VERSION);
460 return -ENOEXEC; 461 return -ENOEXEC;
461 } 462 }
462 463
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 2568eb41cb3a..6a7b730c206b 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -264,7 +264,7 @@ static int unquote(char *from)
264 return p - from; 264 return p - from;
265} 265}
266 266
267static inline char * check_special_flags (char * sfs, Node * e) 267static char * check_special_flags (char * sfs, Node * e)
268{ 268{
269 char * p = sfs; 269 char * p = sfs;
270 int cont = 1; 270 int cont = 1;
@@ -588,11 +588,11 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
588 case 2: set_bit(Enabled, &e->flags); 588 case 2: set_bit(Enabled, &e->flags);
589 break; 589 break;
590 case 3: root = dget(file->f_vfsmnt->mnt_sb->s_root); 590 case 3: root = dget(file->f_vfsmnt->mnt_sb->s_root);
591 down(&root->d_inode->i_sem); 591 mutex_lock(&root->d_inode->i_mutex);
592 592
593 kill_node(e); 593 kill_node(e);
594 594
595 up(&root->d_inode->i_sem); 595 mutex_unlock(&root->d_inode->i_mutex);
596 dput(root); 596 dput(root);
597 break; 597 break;
598 default: return res; 598 default: return res;
@@ -622,7 +622,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
622 return PTR_ERR(e); 622 return PTR_ERR(e);
623 623
624 root = dget(sb->s_root); 624 root = dget(sb->s_root);
625 down(&root->d_inode->i_sem); 625 mutex_lock(&root->d_inode->i_mutex);
626 dentry = lookup_one_len(e->name, root, strlen(e->name)); 626 dentry = lookup_one_len(e->name, root, strlen(e->name));
627 err = PTR_ERR(dentry); 627 err = PTR_ERR(dentry);
628 if (IS_ERR(dentry)) 628 if (IS_ERR(dentry))
@@ -658,7 +658,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
658out2: 658out2:
659 dput(dentry); 659 dput(dentry);
660out: 660out:
661 up(&root->d_inode->i_sem); 661 mutex_unlock(&root->d_inode->i_mutex);
662 dput(root); 662 dput(root);
663 663
664 if (err) { 664 if (err) {
@@ -703,12 +703,12 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer,
703 case 1: enabled = 0; break; 703 case 1: enabled = 0; break;
704 case 2: enabled = 1; break; 704 case 2: enabled = 1; break;
705 case 3: root = dget(file->f_vfsmnt->mnt_sb->s_root); 705 case 3: root = dget(file->f_vfsmnt->mnt_sb->s_root);
706 down(&root->d_inode->i_sem); 706 mutex_lock(&root->d_inode->i_mutex);
707 707
708 while (!list_empty(&entries)) 708 while (!list_empty(&entries))
709 kill_node(list_entry(entries.next, Node, list)); 709 kill_node(list_entry(entries.next, Node, list));
710 710
711 up(&root->d_inode->i_sem); 711 mutex_unlock(&root->d_inode->i_mutex);
712 dput(root); 712 dput(root);
713 default: return res; 713 default: return res;
714 } 714 }
diff --git a/fs/bio.c b/fs/bio.c
index dfe242a21eb4..bbc442b8c867 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -123,9 +123,10 @@ static void bio_fs_destructor(struct bio *bio)
123 bio_free(bio, fs_bio_set); 123 bio_free(bio, fs_bio_set);
124} 124}
125 125
126inline void bio_init(struct bio *bio) 126void bio_init(struct bio *bio)
127{ 127{
128 bio->bi_next = NULL; 128 bio->bi_next = NULL;
129 bio->bi_bdev = NULL;
129 bio->bi_flags = 1 << BIO_UPTODATE; 130 bio->bi_flags = 1 << BIO_UPTODATE;
130 bio->bi_rw = 0; 131 bio->bi_rw = 0;
131 bio->bi_vcnt = 0; 132 bio->bi_vcnt = 0;
@@ -252,7 +253,7 @@ inline int bio_hw_segments(request_queue_t *q, struct bio *bio)
252 * the actual data it points to. Reference count of returned 253 * the actual data it points to. Reference count of returned
253 * bio will be one. 254 * bio will be one.
254 */ 255 */
255inline void __bio_clone(struct bio *bio, struct bio *bio_src) 256void __bio_clone(struct bio *bio, struct bio *bio_src)
256{ 257{
257 request_queue_t *q = bdev_get_queue(bio_src->bi_bdev); 258 request_queue_t *q = bdev_get_queue(bio_src->bi_bdev);
258 259
diff --git a/fs/block_dev.c b/fs/block_dev.c
index e0df94c37b7e..6e50346fb1ee 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -202,7 +202,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
202 loff_t size; 202 loff_t size;
203 loff_t retval; 203 loff_t retval;
204 204
205 down(&bd_inode->i_sem); 205 mutex_lock(&bd_inode->i_mutex);
206 size = i_size_read(bd_inode); 206 size = i_size_read(bd_inode);
207 207
208 switch (origin) { 208 switch (origin) {
@@ -219,7 +219,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
219 } 219 }
220 retval = offset; 220 retval = offset;
221 } 221 }
222 up(&bd_inode->i_sem); 222 mutex_unlock(&bd_inode->i_mutex);
223 return retval; 223 return retval;
224} 224}
225 225
diff --git a/fs/buffer.c b/fs/buffer.c
index 5287be18633b..3dc712f29d2d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -26,6 +26,7 @@
26#include <linux/percpu.h> 26#include <linux/percpu.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/smp_lock.h> 28#include <linux/smp_lock.h>
29#include <linux/capability.h>
29#include <linux/blkdev.h> 30#include <linux/blkdev.h>
30#include <linux/file.h> 31#include <linux/file.h>
31#include <linux/quotaops.h> 32#include <linux/quotaops.h>
@@ -153,14 +154,8 @@ int sync_blockdev(struct block_device *bdev)
153{ 154{
154 int ret = 0; 155 int ret = 0;
155 156
156 if (bdev) { 157 if (bdev)
157 int err; 158 ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
158
159 ret = filemap_fdatawrite(bdev->bd_inode->i_mapping);
160 err = filemap_fdatawait(bdev->bd_inode->i_mapping);
161 if (!ret)
162 ret = err;
163 }
164 return ret; 159 return ret;
165} 160}
166EXPORT_SYMBOL(sync_blockdev); 161EXPORT_SYMBOL(sync_blockdev);
@@ -358,11 +353,11 @@ static long do_fsync(unsigned int fd, int datasync)
358 * We need to protect against concurrent writers, 353 * We need to protect against concurrent writers,
359 * which could cause livelocks in fsync_buffers_list 354 * which could cause livelocks in fsync_buffers_list
360 */ 355 */
361 down(&mapping->host->i_sem); 356 mutex_lock(&mapping->host->i_mutex);
362 err = file->f_op->fsync(file, file->f_dentry, datasync); 357 err = file->f_op->fsync(file, file->f_dentry, datasync);
363 if (!ret) 358 if (!ret)
364 ret = err; 359 ret = err;
365 up(&mapping->host->i_sem); 360 mutex_unlock(&mapping->host->i_mutex);
366 err = filemap_fdatawait(mapping); 361 err = filemap_fdatawait(mapping);
367 if (!ret) 362 if (!ret)
368 ret = err; 363 ret = err;
@@ -1032,7 +1027,7 @@ try_again:
1032 /* Link the buffer to its page */ 1027 /* Link the buffer to its page */
1033 set_bh_page(bh, page, offset); 1028 set_bh_page(bh, page, offset);
1034 1029
1035 bh->b_end_io = NULL; 1030 init_buffer(bh, NULL, NULL);
1036 } 1031 }
1037 return head; 1032 return head;
1038/* 1033/*
@@ -1170,7 +1165,7 @@ failed:
1170 * some of those buffers may be aliases of filesystem data. 1165 * some of those buffers may be aliases of filesystem data.
1171 * grow_dev_page() will go BUG() if this happens. 1166 * grow_dev_page() will go BUG() if this happens.
1172 */ 1167 */
1173static inline int 1168static int
1174grow_buffers(struct block_device *bdev, sector_t block, int size) 1169grow_buffers(struct block_device *bdev, sector_t block, int size)
1175{ 1170{
1176 struct page *page; 1171 struct page *page;
@@ -1396,7 +1391,7 @@ static void bh_lru_install(struct buffer_head *bh)
1396/* 1391/*
1397 * Look up the bh in this cpu's LRU. If it's there, move it to the head. 1392 * Look up the bh in this cpu's LRU. If it's there, move it to the head.
1398 */ 1393 */
1399static inline struct buffer_head * 1394static struct buffer_head *
1400lookup_bh_lru(struct block_device *bdev, sector_t block, int size) 1395lookup_bh_lru(struct block_device *bdev, sector_t block, int size)
1401{ 1396{
1402 struct buffer_head *ret = NULL; 1397 struct buffer_head *ret = NULL;
@@ -1546,7 +1541,7 @@ EXPORT_SYMBOL(set_bh_page);
1546/* 1541/*
1547 * Called when truncating a buffer on a page completely. 1542 * Called when truncating a buffer on a page completely.
1548 */ 1543 */
1549static inline void discard_buffer(struct buffer_head * bh) 1544static void discard_buffer(struct buffer_head * bh)
1550{ 1545{
1551 lock_buffer(bh); 1546 lock_buffer(bh);
1552 clear_buffer_dirty(bh); 1547 clear_buffer_dirty(bh);
@@ -1768,7 +1763,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1768 * handle that here by just cleaning them. 1763 * handle that here by just cleaning them.
1769 */ 1764 */
1770 1765
1771 block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 1766 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1772 head = page_buffers(page); 1767 head = page_buffers(page);
1773 bh = head; 1768 bh = head;
1774 1769
@@ -2160,11 +2155,12 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
2160 * truncates. Uses prepare/commit_write to allow the filesystem to 2155 * truncates. Uses prepare/commit_write to allow the filesystem to
2161 * deal with the hole. 2156 * deal with the hole.
2162 */ 2157 */
2163int generic_cont_expand(struct inode *inode, loff_t size) 2158static int __generic_cont_expand(struct inode *inode, loff_t size,
2159 pgoff_t index, unsigned int offset)
2164{ 2160{
2165 struct address_space *mapping = inode->i_mapping; 2161 struct address_space *mapping = inode->i_mapping;
2166 struct page *page; 2162 struct page *page;
2167 unsigned long index, offset, limit; 2163 unsigned long limit;
2168 int err; 2164 int err;
2169 2165
2170 err = -EFBIG; 2166 err = -EFBIG;
@@ -2176,24 +2172,24 @@ int generic_cont_expand(struct inode *inode, loff_t size)
2176 if (size > inode->i_sb->s_maxbytes) 2172 if (size > inode->i_sb->s_maxbytes)
2177 goto out; 2173 goto out;
2178 2174
2179 offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
2180
2181 /* ugh. in prepare/commit_write, if from==to==start of block, we
2182 ** skip the prepare. make sure we never send an offset for the start
2183 ** of a block
2184 */
2185 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
2186 offset++;
2187 }
2188 index = size >> PAGE_CACHE_SHIFT;
2189 err = -ENOMEM; 2175 err = -ENOMEM;
2190 page = grab_cache_page(mapping, index); 2176 page = grab_cache_page(mapping, index);
2191 if (!page) 2177 if (!page)
2192 goto out; 2178 goto out;
2193 err = mapping->a_ops->prepare_write(NULL, page, offset, offset); 2179 err = mapping->a_ops->prepare_write(NULL, page, offset, offset);
2194 if (!err) { 2180 if (err) {
2195 err = mapping->a_ops->commit_write(NULL, page, offset, offset); 2181 /*
2182 * ->prepare_write() may have instantiated a few blocks
2183 * outside i_size. Trim these off again.
2184 */
2185 unlock_page(page);
2186 page_cache_release(page);
2187 vmtruncate(inode, inode->i_size);
2188 goto out;
2196 } 2189 }
2190
2191 err = mapping->a_ops->commit_write(NULL, page, offset, offset);
2192
2197 unlock_page(page); 2193 unlock_page(page);
2198 page_cache_release(page); 2194 page_cache_release(page);
2199 if (err > 0) 2195 if (err > 0)
@@ -2202,6 +2198,36 @@ out:
2202 return err; 2198 return err;
2203} 2199}
2204 2200
2201int generic_cont_expand(struct inode *inode, loff_t size)
2202{
2203 pgoff_t index;
2204 unsigned int offset;
2205
2206 offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */
2207
2208 /* ugh. in prepare/commit_write, if from==to==start of block, we
2209 ** skip the prepare. make sure we never send an offset for the start
2210 ** of a block
2211 */
2212 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
2213 /* caller must handle this extra byte. */
2214 offset++;
2215 }
2216 index = size >> PAGE_CACHE_SHIFT;
2217
2218 return __generic_cont_expand(inode, size, index, offset);
2219}
2220
2221int generic_cont_expand_simple(struct inode *inode, loff_t size)
2222{
2223 loff_t pos = size - 1;
2224 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2225 unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1;
2226
2227 /* prepare/commit_write can handle even if from==to==start of block. */
2228 return __generic_cont_expand(inode, size, index, offset);
2229}
2230
2205/* 2231/*
2206 * For moronic filesystems that do not allow holes in file. 2232 * For moronic filesystems that do not allow holes in file.
2207 * We may have to extend the file. 2233 * We may have to extend the file.
@@ -2313,7 +2339,7 @@ int generic_commit_write(struct file *file, struct page *page,
2313 __block_commit_write(inode,page,from,to); 2339 __block_commit_write(inode,page,from,to);
2314 /* 2340 /*
2315 * No need to use i_size_read() here, the i_size 2341 * No need to use i_size_read() here, the i_size
2316 * cannot change under us because we hold i_sem. 2342 * cannot change under us because we hold i_mutex.
2317 */ 2343 */
2318 if (pos > inode->i_size) { 2344 if (pos > inode->i_size) {
2319 i_size_write(inode, pos); 2345 i_size_write(inode, pos);
@@ -2610,7 +2636,7 @@ int block_truncate_page(struct address_space *mapping,
2610 pgoff_t index = from >> PAGE_CACHE_SHIFT; 2636 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2611 unsigned offset = from & (PAGE_CACHE_SIZE-1); 2637 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2612 unsigned blocksize; 2638 unsigned blocksize;
2613 pgoff_t iblock; 2639 sector_t iblock;
2614 unsigned length, pos; 2640 unsigned length, pos;
2615 struct inode *inode = mapping->host; 2641 struct inode *inode = mapping->host;
2616 struct page *page; 2642 struct page *page;
@@ -2626,7 +2652,7 @@ int block_truncate_page(struct address_space *mapping,
2626 return 0; 2652 return 0;
2627 2653
2628 length = blocksize - length; 2654 length = blocksize - length;
2629 iblock = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 2655 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2630 2656
2631 page = grab_cache_page(mapping, index); 2657 page = grab_cache_page(mapping, index);
2632 err = -ENOMEM; 2658 err = -ENOMEM;
@@ -3145,6 +3171,7 @@ EXPORT_SYMBOL(fsync_bdev);
3145EXPORT_SYMBOL(generic_block_bmap); 3171EXPORT_SYMBOL(generic_block_bmap);
3146EXPORT_SYMBOL(generic_commit_write); 3172EXPORT_SYMBOL(generic_commit_write);
3147EXPORT_SYMBOL(generic_cont_expand); 3173EXPORT_SYMBOL(generic_cont_expand);
3174EXPORT_SYMBOL(generic_cont_expand_simple);
3148EXPORT_SYMBOL(init_buffer); 3175EXPORT_SYMBOL(init_buffer);
3149EXPORT_SYMBOL(invalidate_bdev); 3176EXPORT_SYMBOL(invalidate_bdev);
3150EXPORT_SYMBOL(ll_rw_block); 3177EXPORT_SYMBOL(ll_rw_block);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 3b1b1eefdbb0..21195c481637 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -35,7 +35,7 @@ static struct char_device_struct {
35 unsigned int major; 35 unsigned int major;
36 unsigned int baseminor; 36 unsigned int baseminor;
37 int minorct; 37 int minorct;
38 const char *name; 38 char name[64];
39 struct file_operations *fops; 39 struct file_operations *fops;
40 struct cdev *cdev; /* will die */ 40 struct cdev *cdev; /* will die */
41} *chrdevs[MAX_PROBE_HASH]; 41} *chrdevs[MAX_PROBE_HASH];
@@ -46,34 +46,84 @@ static inline int major_to_index(int major)
46 return major % MAX_PROBE_HASH; 46 return major % MAX_PROBE_HASH;
47} 47}
48 48
49/* get char device names in somewhat random order */ 49struct chrdev_info {
50int get_chrdev_list(char *page) 50 int index;
51{
52 struct char_device_struct *cd; 51 struct char_device_struct *cd;
53 int i, len; 52};
54 53
55 len = sprintf(page, "Character devices:\n"); 54void *get_next_chrdev(void *dev)
55{
56 struct chrdev_info *info;
56 57
58 if (dev == NULL) {
59 info = kmalloc(sizeof(*info), GFP_KERNEL);
60 if (!info)
61 goto out;
62 info->index=0;
63 info->cd = chrdevs[info->index];
64 if (info->cd)
65 goto out;
66 } else {
67 info = dev;
68 }
69
70 while (info->index < ARRAY_SIZE(chrdevs)) {
71 if (info->cd)
72 info->cd = info->cd->next;
73 if (info->cd)
74 goto out;
75 /*
76 * No devices on this chain, move to the next
77 */
78 info->index++;
79 info->cd = (info->index < ARRAY_SIZE(chrdevs)) ?
80 chrdevs[info->index] : NULL;
81 if (info->cd)
82 goto out;
83 }
84
85out:
86 return info;
87}
88
89void *acquire_chrdev_list(void)
90{
57 down(&chrdevs_lock); 91 down(&chrdevs_lock);
92 return get_next_chrdev(NULL);
93}
94
95void release_chrdev_list(void *dev)
96{
97 up(&chrdevs_lock);
98 kfree(dev);
99}
100
101
102int count_chrdev_list(void)
103{
104 struct char_device_struct *cd;
105 int i, count;
106
107 count = 0;
108
58 for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) { 109 for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) {
59 for (cd = chrdevs[i]; cd; cd = cd->next) { 110 for (cd = chrdevs[i]; cd; cd = cd->next)
60 /* 111 count++;
61 * if the current name, plus the 5 extra characters
62 * in the device line for this entry
63 * would run us off the page, we're done
64 */
65 if ((len+strlen(cd->name) + 5) >= PAGE_SIZE)
66 goto page_full;
67
68
69 len += sprintf(page+len, "%3d %s\n",
70 cd->major, cd->name);
71 }
72 } 112 }
73page_full:
74 up(&chrdevs_lock);
75 113
76 return len; 114 return count;
115}
116
117int get_chrdev_info(void *dev, int *major, char **name)
118{
119 struct chrdev_info *info = dev;
120
121 if (info->cd == NULL)
122 return 1;
123
124 *major = info->cd->major;
125 *name = info->cd->name;
126 return 0;
77} 127}
78 128
79/* 129/*
@@ -121,7 +171,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
121 cd->major = major; 171 cd->major = major;
122 cd->baseminor = baseminor; 172 cd->baseminor = baseminor;
123 cd->minorct = minorct; 173 cd->minorct = minorct;
124 cd->name = name; 174 strncpy(cd->name,name, 64);
125 175
126 i = major_to_index(major); 176 i = major_to_index(major);
127 177
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 943ef9b82244..d335015473a5 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,11 @@
1Version 1.40
2------------
3Use fsuid (fsgid) more consistently instead of uid (gid). Improve performance
4of readpages by eliminating one extra memcpy. Allow update of file size
5from remote server even if file is open for write as long as mount is
6directio. Recognize share mode security and send NTLM encrypted password
7on tree connect if share mode negotiated.
8
1Version 1.39 9Version 1.39
2------------ 10------------
3Defer close of a file handle slightly if pending writes depend on that handle 11Defer close of a file handle slightly if pending writes depend on that handle
@@ -7,6 +15,8 @@ Fix SFU style symlinks and mknod needed for servers which do not support the
7CIFS Unix Extensions. Fix setfacl/getfacl on bigendian. Timeout negative 15CIFS Unix Extensions. Fix setfacl/getfacl on bigendian. Timeout negative
8dentries so files that the client sees as deleted but that later get created 16dentries so files that the client sees as deleted but that later get created
9on the server will be recognized. Add client side permission check on setattr. 17on the server will be recognized. Add client side permission check on setattr.
18Timeout stuck requests better (where server has never responded or sent corrupt
19responses)
10 20
11Version 1.38 21Version 1.38
12------------ 22------------
diff --git a/fs/cifs/README b/fs/cifs/README
index e5d09a2fc7a5..b0070d1b149d 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -436,7 +436,17 @@ A partial list of the supported mount options follows:
436 SFU does). In the future the bottom 9 bits of the mode 436 SFU does). In the future the bottom 9 bits of the mode
437 mode also will be emulated using queries of the security 437 mode also will be emulated using queries of the security
438 descriptor (ACL). 438 descriptor (ACL).
439 439sec Security mode. Allowed values are:
440 none attempt to connection as a null user (no name)
441 krb5 Use Kerberos version 5 authentication
442 krb5i Use Kerberos authentication and packet signing
443 ntlm Use NTLM password hashing (default)
444 ntlmi Use NTLM password hashing with signing (if
445 /proc/fs/cifs/PacketSigningEnabled on or if
446 server requires signing also can be the default)
447 ntlmv2 Use NTLMv2 password hashing
448 ntlmv2i Use NTLMv2 password hashing with packet signing
449
440The mount.cifs mount helper also accepts a few mount options before -o 450The mount.cifs mount helper also accepts a few mount options before -o
441including: 451including:
442 452
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 22a444a3fe4c..f4124a32bef8 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -219,6 +219,10 @@ cifs_stats_write(struct file *file, const char __user *buffer,
219 219
220 if (c == '1' || c == 'y' || c == 'Y' || c == '0') { 220 if (c == '1' || c == 'y' || c == 'Y' || c == '0') {
221 read_lock(&GlobalSMBSeslock); 221 read_lock(&GlobalSMBSeslock);
222#ifdef CONFIG_CIFS_STATS2
223 atomic_set(&totBufAllocCount, 0);
224 atomic_set(&totSmBufAllocCount, 0);
225#endif /* CONFIG_CIFS_STATS2 */
222 list_for_each(tmp, &GlobalTreeConnectionList) { 226 list_for_each(tmp, &GlobalTreeConnectionList) {
223 tcon = list_entry(tmp, struct cifsTconInfo, 227 tcon = list_entry(tmp, struct cifsTconInfo,
224 cifsConnectionList); 228 cifsConnectionList);
@@ -276,6 +280,14 @@ cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
276 smBufAllocCount.counter,cifs_min_small); 280 smBufAllocCount.counter,cifs_min_small);
277 length += item_length; 281 length += item_length;
278 buf += item_length; 282 buf += item_length;
283#ifdef CONFIG_CIFS_STATS2
284 item_length = sprintf(buf, "Total Large %d Small %d Allocations\n",
285 atomic_read(&totBufAllocCount),
286 atomic_read(&totSmBufAllocCount));
287 length += item_length;
288 buf += item_length;
289#endif /* CONFIG_CIFS_STATS2 */
290
279 item_length = 291 item_length =
280 sprintf(buf,"Operations (MIDs): %d\n", 292 sprintf(buf,"Operations (MIDs): %d\n",
281 midCount.counter); 293 midCount.counter);
@@ -389,8 +401,8 @@ static read_proc_t ntlmv2_enabled_read;
389static write_proc_t ntlmv2_enabled_write; 401static write_proc_t ntlmv2_enabled_write;
390static read_proc_t packet_signing_enabled_read; 402static read_proc_t packet_signing_enabled_read;
391static write_proc_t packet_signing_enabled_write; 403static write_proc_t packet_signing_enabled_write;
392static read_proc_t quotaEnabled_read; 404static read_proc_t experimEnabled_read;
393static write_proc_t quotaEnabled_write; 405static write_proc_t experimEnabled_write;
394static read_proc_t linuxExtensionsEnabled_read; 406static read_proc_t linuxExtensionsEnabled_read;
395static write_proc_t linuxExtensionsEnabled_write; 407static write_proc_t linuxExtensionsEnabled_write;
396 408
@@ -430,9 +442,9 @@ cifs_proc_init(void)
430 pde->write_proc = oplockEnabled_write; 442 pde->write_proc = oplockEnabled_write;
431 443
432 pde = create_proc_read_entry("Experimental", 0, proc_fs_cifs, 444 pde = create_proc_read_entry("Experimental", 0, proc_fs_cifs,
433 quotaEnabled_read, NULL); 445 experimEnabled_read, NULL);
434 if (pde) 446 if (pde)
435 pde->write_proc = quotaEnabled_write; 447 pde->write_proc = experimEnabled_write;
436 448
437 pde = create_proc_read_entry("LinuxExtensionsEnabled", 0, proc_fs_cifs, 449 pde = create_proc_read_entry("LinuxExtensionsEnabled", 0, proc_fs_cifs,
438 linuxExtensionsEnabled_read, NULL); 450 linuxExtensionsEnabled_read, NULL);
@@ -574,14 +586,13 @@ oplockEnabled_write(struct file *file, const char __user *buffer,
574} 586}
575 587
576static int 588static int
577quotaEnabled_read(char *page, char **start, off_t off, 589experimEnabled_read(char *page, char **start, off_t off,
578 int count, int *eof, void *data) 590 int count, int *eof, void *data)
579{ 591{
580 int len; 592 int len;
581 593
582 len = sprintf(page, "%d\n", experimEnabled); 594 len = sprintf(page, "%d\n", experimEnabled);
583/* could also check if quotas are enabled in kernel 595
584 as a whole first */
585 len -= off; 596 len -= off;
586 *start = page + off; 597 *start = page + off;
587 598
@@ -596,21 +607,23 @@ quotaEnabled_read(char *page, char **start, off_t off,
596 return len; 607 return len;
597} 608}
598static int 609static int
599quotaEnabled_write(struct file *file, const char __user *buffer, 610experimEnabled_write(struct file *file, const char __user *buffer,
600 unsigned long count, void *data) 611 unsigned long count, void *data)
601{ 612{
602 char c; 613 char c;
603 int rc; 614 int rc;
604 615
605 rc = get_user(c, buffer); 616 rc = get_user(c, buffer);
606 if (rc) 617 if (rc)
607 return rc; 618 return rc;
608 if (c == '0' || c == 'n' || c == 'N') 619 if (c == '0' || c == 'n' || c == 'N')
609 experimEnabled = 0; 620 experimEnabled = 0;
610 else if (c == '1' || c == 'y' || c == 'Y') 621 else if (c == '1' || c == 'y' || c == 'Y')
611 experimEnabled = 1; 622 experimEnabled = 1;
623 else if (c == '2')
624 experimEnabled = 2;
612 625
613 return count; 626 return count;
614} 627}
615 628
616static int 629static int
@@ -620,8 +633,6 @@ linuxExtensionsEnabled_read(char *page, char **start, off_t off,
620 int len; 633 int len;
621 634
622 len = sprintf(page, "%d\n", linuxExtEnabled); 635 len = sprintf(page, "%d\n", linuxExtEnabled);
623/* could also check if quotas are enabled in kernel
624 as a whole first */
625 len -= off; 636 len -= off;
626 *start = page + off; 637 *start = page + off;
627 638
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index f799f6f0e729..ad58eb0c4d6d 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -24,9 +24,10 @@
24#define CIFS_MOUNT_DIRECT_IO 8 /* do not write nor read through page cache */ 24#define CIFS_MOUNT_DIRECT_IO 8 /* do not write nor read through page cache */
25#define CIFS_MOUNT_NO_XATTR 0x10 /* if set - disable xattr support */ 25#define CIFS_MOUNT_NO_XATTR 0x10 /* if set - disable xattr support */
26#define CIFS_MOUNT_MAP_SPECIAL_CHR 0x20 /* remap illegal chars in filenames */ 26#define CIFS_MOUNT_MAP_SPECIAL_CHR 0x20 /* remap illegal chars in filenames */
27#define CIFS_MOUNT_POSIX_PATHS 0x40 /* Negotiate posix pathnames if possible. */ 27#define CIFS_MOUNT_POSIX_PATHS 0x40 /* Negotiate posix pathnames if possible. */
28#define CIFS_MOUNT_UNX_EMUL 0x80 /* Network compat with SFUnix emulation */ 28#define CIFS_MOUNT_UNX_EMUL 0x80 /* Network compat with SFUnix emulation */
29#define CIFS_MOUNT_NO_BRL 0x100 /* No sending byte range locks to srv */ 29#define CIFS_MOUNT_NO_BRL 0x100 /* No sending byte range locks to srv */
30#define CIFS_MOUNT_CIFS_ACL 0x200 /* send ACL requests to non-POSIX srv */
30 31
31struct cifs_sb_info { 32struct cifs_sb_info {
32 struct cifsTconInfo *tcon; /* primary mount */ 33 struct cifsTconInfo *tcon; /* primary mount */
diff --git a/fs/cifs/cifs_uniupr.h b/fs/cifs/cifs_uniupr.h
index decd138f14d4..da2ad5b451ac 100644
--- a/fs/cifs/cifs_uniupr.h
+++ b/fs/cifs/cifs_uniupr.h
@@ -242,7 +242,7 @@ static signed char UniCaseRangeLff20[27] = {
242/* 242/*
243 * Lower Case Range 243 * Lower Case Range
244 */ 244 */
245const static struct UniCaseRange CifsUniLowerRange[] = { 245static const struct UniCaseRange CifsUniLowerRange[] = {
246 0x0380, 0x03ab, UniCaseRangeL0380, 246 0x0380, 0x03ab, UniCaseRangeL0380,
247 0x0400, 0x042f, UniCaseRangeL0400, 247 0x0400, 0x042f, UniCaseRangeL0400,
248 0x0490, 0x04cb, UniCaseRangeL0490, 248 0x0490, 0x04cb, UniCaseRangeL0490,
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
new file mode 100644
index 000000000000..d0776ac2b804
--- /dev/null
+++ b/fs/cifs/cifsacl.h
@@ -0,0 +1,38 @@
1/*
2 * fs/cifs/cifsacl.h
3 *
4 * Copyright (c) International Business Machines Corp., 2005
5 * Author(s): Steve French (sfrench@us.ibm.com)
6 *
7 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published
9 * by the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
15 * the GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#ifndef _CIFSACL_H
23#define _CIFSACL_H
24
25struct cifs_sid {
26 __u8 revision; /* revision level */
27 __u8 num_subauths;
28 __u8 authority[6];
29 __u32 sub_auth[4];
30 /* next sub_auth if any ... */
31} __attribute__((packed));
32
33/* everyone */
34extern const struct cifs_sid sid_everyone;
35/* group users */
36extern const struct cifs_sid sid_user;
37
38#endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index fe2bb7c4c912..a2c24858d40f 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifsencrypt.c 2 * fs/cifs/cifsencrypt.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2003 4 * Copyright (C) International Business Machines Corp., 2005
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -82,6 +82,59 @@ int cifs_sign_smb(struct smb_hdr * cifs_pdu, struct TCP_Server_Info * server,
82 return rc; 82 return rc;
83} 83}
84 84
85static int cifs_calc_signature2(const struct kvec * iov, int n_vec,
86 const char * key, char * signature)
87{
88 struct MD5Context context;
89
90 if((iov == NULL) || (signature == NULL))
91 return -EINVAL;
92
93 MD5Init(&context);
94 MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16);
95
96/* MD5Update(&context,cifs_pdu->Protocol,cifs_pdu->smb_buf_length); */ /* BB FIXME BB */
97
98 MD5Final(signature,&context);
99
100 return -EOPNOTSUPP;
101/* return 0; */
102}
103
104
105int cifs_sign_smb2(struct kvec * iov, int n_vec, struct TCP_Server_Info *server,
106 __u32 * pexpected_response_sequence_number)
107{
108 int rc = 0;
109 char smb_signature[20];
110 struct smb_hdr * cifs_pdu = iov[0].iov_base;
111
112 if((cifs_pdu == NULL) || (server == NULL))
113 return -EINVAL;
114
115 if((cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) == 0)
116 return rc;
117
118 spin_lock(&GlobalMid_Lock);
119 cifs_pdu->Signature.Sequence.SequenceNumber =
120 cpu_to_le32(server->sequence_number);
121 cifs_pdu->Signature.Sequence.Reserved = 0;
122
123 *pexpected_response_sequence_number = server->sequence_number++;
124 server->sequence_number++;
125 spin_unlock(&GlobalMid_Lock);
126
127 rc = cifs_calc_signature2(iov, n_vec, server->mac_signing_key,
128 smb_signature);
129 if(rc)
130 memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
131 else
132 memcpy(cifs_pdu->Signature.SecuritySignature, smb_signature, 8);
133
134 return rc;
135
136}
137
85int cifs_verify_signature(struct smb_hdr * cifs_pdu, const char * mac_key, 138int cifs_verify_signature(struct smb_hdr * cifs_pdu, const char * mac_key,
86 __u32 expected_sequence_number) 139 __u32 expected_sequence_number)
87{ 140{
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 2a13a2bac8f1..79eeccd0437f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -513,6 +513,17 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const char __user *buf,
513 return written; 513 return written;
514} 514}
515 515
516static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
517{
518 /* origin == SEEK_END => we must revalidate the cached file length */
519 if (origin == 2) {
520 int retval = cifs_revalidate(file->f_dentry);
521 if (retval < 0)
522 return (loff_t)retval;
523 }
524 return remote_llseek(file, offset, origin);
525}
526
516static struct file_system_type cifs_fs_type = { 527static struct file_system_type cifs_fs_type = {
517 .owner = THIS_MODULE, 528 .owner = THIS_MODULE,
518 .name = "cifs", 529 .name = "cifs",
@@ -586,6 +597,7 @@ struct file_operations cifs_file_ops = {
586 .flush = cifs_flush, 597 .flush = cifs_flush,
587 .mmap = cifs_file_mmap, 598 .mmap = cifs_file_mmap,
588 .sendfile = generic_file_sendfile, 599 .sendfile = generic_file_sendfile,
600 .llseek = cifs_llseek,
589#ifdef CONFIG_CIFS_POSIX 601#ifdef CONFIG_CIFS_POSIX
590 .ioctl = cifs_ioctl, 602 .ioctl = cifs_ioctl,
591#endif /* CONFIG_CIFS_POSIX */ 603#endif /* CONFIG_CIFS_POSIX */
@@ -609,7 +621,7 @@ struct file_operations cifs_file_direct_ops = {
609#ifdef CONFIG_CIFS_POSIX 621#ifdef CONFIG_CIFS_POSIX
610 .ioctl = cifs_ioctl, 622 .ioctl = cifs_ioctl,
611#endif /* CONFIG_CIFS_POSIX */ 623#endif /* CONFIG_CIFS_POSIX */
612 624 .llseek = cifs_llseek,
613#ifdef CONFIG_CIFS_EXPERIMENTAL 625#ifdef CONFIG_CIFS_EXPERIMENTAL
614 .dir_notify = cifs_dir_notify, 626 .dir_notify = cifs_dir_notify,
615#endif /* CONFIG_CIFS_EXPERIMENTAL */ 627#endif /* CONFIG_CIFS_EXPERIMENTAL */
@@ -627,6 +639,7 @@ struct file_operations cifs_file_nobrl_ops = {
627 .flush = cifs_flush, 639 .flush = cifs_flush,
628 .mmap = cifs_file_mmap, 640 .mmap = cifs_file_mmap,
629 .sendfile = generic_file_sendfile, 641 .sendfile = generic_file_sendfile,
642 .llseek = cifs_llseek,
630#ifdef CONFIG_CIFS_POSIX 643#ifdef CONFIG_CIFS_POSIX
631 .ioctl = cifs_ioctl, 644 .ioctl = cifs_ioctl,
632#endif /* CONFIG_CIFS_POSIX */ 645#endif /* CONFIG_CIFS_POSIX */
@@ -649,7 +662,7 @@ struct file_operations cifs_file_direct_nobrl_ops = {
649#ifdef CONFIG_CIFS_POSIX 662#ifdef CONFIG_CIFS_POSIX
650 .ioctl = cifs_ioctl, 663 .ioctl = cifs_ioctl,
651#endif /* CONFIG_CIFS_POSIX */ 664#endif /* CONFIG_CIFS_POSIX */
652 665 .llseek = cifs_llseek,
653#ifdef CONFIG_CIFS_EXPERIMENTAL 666#ifdef CONFIG_CIFS_EXPERIMENTAL
654 .dir_notify = cifs_dir_notify, 667 .dir_notify = cifs_dir_notify,
655#endif /* CONFIG_CIFS_EXPERIMENTAL */ 668#endif /* CONFIG_CIFS_EXPERIMENTAL */
@@ -733,7 +746,7 @@ cifs_init_request_bufs(void)
733 kmem_cache_destroy(cifs_req_cachep); 746 kmem_cache_destroy(cifs_req_cachep);
734 return -ENOMEM; 747 return -ENOMEM;
735 } 748 }
736 /* 256 (MAX_CIFS_HDR_SIZE bytes is enough for most SMB responses and 749 /* MAX_CIFS_SMALL_BUFFER_SIZE bytes is enough for most SMB responses and
737 almost all handle based requests (but not write response, nor is it 750 almost all handle based requests (but not write response, nor is it
738 sufficient for path based requests). A smaller size would have 751 sufficient for path based requests). A smaller size would have
739 been more efficient (compacting multiple slab items on one 4k page) 752 been more efficient (compacting multiple slab items on one 4k page)
@@ -742,7 +755,8 @@ cifs_init_request_bufs(void)
742 efficient to alloc 1 per page off the slab compared to 17K (5page) 755 efficient to alloc 1 per page off the slab compared to 17K (5page)
743 alloc of large cifs buffers even when page debugging is on */ 756 alloc of large cifs buffers even when page debugging is on */
744 cifs_sm_req_cachep = kmem_cache_create("cifs_small_rq", 757 cifs_sm_req_cachep = kmem_cache_create("cifs_small_rq",
745 MAX_CIFS_HDR_SIZE, 0, SLAB_HWCACHE_ALIGN, NULL, NULL); 758 MAX_CIFS_SMALL_BUFFER_SIZE, 0, SLAB_HWCACHE_ALIGN,
759 NULL, NULL);
746 if (cifs_sm_req_cachep == NULL) { 760 if (cifs_sm_req_cachep == NULL) {
747 mempool_destroy(cifs_req_poolp); 761 mempool_destroy(cifs_req_poolp);
748 kmem_cache_destroy(cifs_req_cachep); 762 kmem_cache_destroy(cifs_req_cachep);
@@ -860,9 +874,9 @@ static int cifs_oplock_thread(void * dummyarg)
860 DeleteOplockQEntry(oplock_item); 874 DeleteOplockQEntry(oplock_item);
861 /* can not grab inode sem here since it would 875 /* can not grab inode sem here since it would
862 deadlock when oplock received on delete 876 deadlock when oplock received on delete
863 since vfs_unlink holds the i_sem across 877 since vfs_unlink holds the i_mutex across
864 the call */ 878 the call */
865 /* down(&inode->i_sem);*/ 879 /* mutex_lock(&inode->i_mutex);*/
866 if (S_ISREG(inode->i_mode)) { 880 if (S_ISREG(inode->i_mode)) {
867 rc = filemap_fdatawrite(inode->i_mapping); 881 rc = filemap_fdatawrite(inode->i_mapping);
868 if(CIFS_I(inode)->clientCanCacheRead == 0) { 882 if(CIFS_I(inode)->clientCanCacheRead == 0) {
@@ -871,7 +885,7 @@ static int cifs_oplock_thread(void * dummyarg)
871 } 885 }
872 } else 886 } else
873 rc = 0; 887 rc = 0;
874 /* up(&inode->i_sem);*/ 888 /* mutex_unlock(&inode->i_mutex);*/
875 if (rc) 889 if (rc)
876 CIFS_I(inode)->write_behind_rc = rc; 890 CIFS_I(inode)->write_behind_rc = rc;
877 cFYI(1,("Oplock flush inode %p rc %d",inode,rc)); 891 cFYI(1,("Oplock flush inode %p rc %d",inode,rc));
@@ -954,6 +968,12 @@ init_cifs(void)
954 atomic_set(&tconInfoReconnectCount, 0); 968 atomic_set(&tconInfoReconnectCount, 0);
955 969
956 atomic_set(&bufAllocCount, 0); 970 atomic_set(&bufAllocCount, 0);
971 atomic_set(&smBufAllocCount, 0);
972#ifdef CONFIG_CIFS_STATS2
973 atomic_set(&totBufAllocCount, 0);
974 atomic_set(&totSmBufAllocCount, 0);
975#endif /* CONFIG_CIFS_STATS2 */
976
957 atomic_set(&midCount, 0); 977 atomic_set(&midCount, 0);
958 GlobalCurrentXid = 0; 978 GlobalCurrentXid = 0;
959 GlobalTotalActiveXid = 0; 979 GlobalTotalActiveXid = 0;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 9ec40e0e54fc..821a8eb22559 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -99,5 +99,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
99extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 99extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
100extern int cifs_ioctl (struct inode * inode, struct file * filep, 100extern int cifs_ioctl (struct inode * inode, struct file * filep,
101 unsigned int command, unsigned long arg); 101 unsigned int command, unsigned long arg);
102#define CIFS_VERSION "1.39" 102#define CIFS_VERSION "1.40"
103#endif /* _CIFSFS_H */ 103#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 1ba08f8c5bc4..7bed27601ce5 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -233,6 +233,8 @@ struct cifsTconInfo {
233 atomic_t num_hardlinks; 233 atomic_t num_hardlinks;
234 atomic_t num_symlinks; 234 atomic_t num_symlinks;
235 atomic_t num_locks; 235 atomic_t num_locks;
236 atomic_t num_acl_get;
237 atomic_t num_acl_set;
236#ifdef CONFIG_CIFS_STATS2 238#ifdef CONFIG_CIFS_STATS2
237 unsigned long long time_writes; 239 unsigned long long time_writes;
238 unsigned long long time_reads; 240 unsigned long long time_reads;
@@ -285,6 +287,7 @@ struct cifs_search_info {
285 unsigned endOfSearch:1; 287 unsigned endOfSearch:1;
286 unsigned emptyDir:1; 288 unsigned emptyDir:1;
287 unsigned unicode:1; 289 unsigned unicode:1;
290 unsigned smallBuf:1; /* so we know which buf_release function to call */
288}; 291};
289 292
290struct cifsFileInfo { 293struct cifsFileInfo {
@@ -420,7 +423,12 @@ struct dir_notify_req {
420#define MID_RESPONSE_RECEIVED 4 423#define MID_RESPONSE_RECEIVED 4
421#define MID_RETRY_NEEDED 8 /* session closed while this request out */ 424#define MID_RETRY_NEEDED 8 /* session closed while this request out */
422#define MID_NO_RESP_NEEDED 0x10 425#define MID_NO_RESP_NEEDED 0x10
423#define MID_SMALL_BUFFER 0x20 /* 112 byte response buffer instead of 4K */ 426
427/* Types of response buffer returned from SendReceive2 */
428#define CIFS_NO_BUFFER 0 /* Response buffer not returned */
429#define CIFS_SMALL_BUFFER 1
430#define CIFS_LARGE_BUFFER 2
431#define CIFS_IOVEC 4 /* array of response buffers */
424 432
425/* 433/*
426 ***************************************************************** 434 *****************************************************************
@@ -505,8 +513,12 @@ GLOBAL_EXTERN atomic_t tcpSesReconnectCount;
505GLOBAL_EXTERN atomic_t tconInfoReconnectCount; 513GLOBAL_EXTERN atomic_t tconInfoReconnectCount;
506 514
507/* Various Debug counters to remove someday (BB) */ 515/* Various Debug counters to remove someday (BB) */
508GLOBAL_EXTERN atomic_t bufAllocCount; 516GLOBAL_EXTERN atomic_t bufAllocCount; /* current number allocated */
509GLOBAL_EXTERN atomic_t smBufAllocCount; 517#ifdef CONFIG_CIFS_STATS2
518GLOBAL_EXTERN atomic_t totBufAllocCount; /* total allocated over all time */
519GLOBAL_EXTERN atomic_t totSmBufAllocCount;
520#endif
521GLOBAL_EXTERN atomic_t smBufAllocCount;
510GLOBAL_EXTERN atomic_t midCount; 522GLOBAL_EXTERN atomic_t midCount;
511 523
512/* Misc globals */ 524/* Misc globals */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 33e1859fd2f6..cc2471094ca5 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifspdu.h 2 * fs/cifs/cifspdu.h
3 * 3 *
4 * Copyright (c) International Business Machines Corp., 2002 4 * Copyright (c) International Business Machines Corp., 2002,2005
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -80,7 +80,11 @@
80#define NT_TRANSACT_GET_USER_QUOTA 0x07 80#define NT_TRANSACT_GET_USER_QUOTA 0x07
81#define NT_TRANSACT_SET_USER_QUOTA 0x08 81#define NT_TRANSACT_SET_USER_QUOTA 0x08
82 82
83#define MAX_CIFS_HDR_SIZE 256 /* is future chained NTCreateXReadX bigger? */ 83#define MAX_CIFS_SMALL_BUFFER_SIZE 448 /* big enough for most */
84/* future chained NTCreateXReadX bigger, but for time being NTCreateX biggest */
85/* among the requests (NTCreateX response is bigger with wct of 34) */
86#define MAX_CIFS_HDR_SIZE 0x58 /* 4 len + 32 hdr + (2*24 wct) + 2 bct + 2 pad */
87#define CIFS_SMALL_PATH 120 /* allows for (448-88)/3 */
84 88
85/* internal cifs vfs structures */ 89/* internal cifs vfs structures */
86/***************************************************************** 90/*****************************************************************
@@ -524,7 +528,7 @@ typedef union smb_com_session_setup_andx {
524 /* STRING PrimaryDomain */ 528 /* STRING PrimaryDomain */
525 /* STRING NativeOS */ 529 /* STRING NativeOS */
526 /* STRING NativeLanMan */ 530 /* STRING NativeLanMan */
527 } __attribute__((packed)) old_req; /* pre-NTLM (LANMAN2.1) request format */ 531 } __attribute__((packed)) old_req; /* pre-NTLM (LANMAN2.1) req format */
528 532
529 struct { /* default (NTLM) response format */ 533 struct { /* default (NTLM) response format */
530 struct smb_hdr hdr; /* wct = 3 */ 534 struct smb_hdr hdr; /* wct = 3 */
@@ -536,7 +540,7 @@ typedef union smb_com_session_setup_andx {
536 unsigned char NativeOS[1]; /* followed by */ 540 unsigned char NativeOS[1]; /* followed by */
537/* unsigned char * NativeLanMan; */ 541/* unsigned char * NativeLanMan; */
538/* unsigned char * PrimaryDomain; */ 542/* unsigned char * PrimaryDomain; */
539 } __attribute__((packed)) old_resp; /* pre-NTLM (LANMAN2.1) response format */ 543 } __attribute__((packed)) old_resp; /* pre-NTLM (LANMAN2.1) response */
540} __attribute__((packed)) SESSION_SETUP_ANDX; 544} __attribute__((packed)) SESSION_SETUP_ANDX;
541 545
542#define CIFS_NETWORK_OPSYS "CIFS VFS Client for Linux" 546#define CIFS_NETWORK_OPSYS "CIFS VFS Client for Linux"
@@ -1003,10 +1007,49 @@ typedef struct smb_com_setattr_rsp {
1003 1007
1004/* empty wct response to setattr */ 1008/* empty wct response to setattr */
1005 1009
1006/***************************************************/ 1010/*******************************************************/
1007/* NT Transact structure defintions follow */ 1011/* NT Transact structure defintions follow */
1008/* Currently only ioctl and notify are implemented */ 1012/* Currently only ioctl, acl (get security descriptor) */
1009/***************************************************/ 1013/* and notify are implemented */
1014/*******************************************************/
1015typedef struct smb_com_ntransact_req {
1016 struct smb_hdr hdr; /* wct >= 19 */
1017 __u8 MaxSetupCount;
1018 __u16 Reserved;
1019 __le32 TotalParameterCount;
1020 __le32 TotalDataCount;
1021 __le32 MaxParameterCount;
1022 __le32 MaxDataCount;
1023 __le32 ParameterCount;
1024 __le32 ParameterOffset;
1025 __le32 DataCount;
1026 __le32 DataOffset;
1027 __u8 SetupCount; /* four setup words follow subcommand */
1028 /* SNIA spec incorrectly included spurious pad here */
1029 __le16 SubCommand; /* 2 = IOCTL/FSCTL */
1030 /* SetupCount words follow then */
1031 __le16 ByteCount;
1032 __u8 Pad[3];
1033 __u8 Parms[0];
1034} __attribute__((packed)) NTRANSACT_REQ;
1035
1036typedef struct smb_com_ntransact_rsp {
1037 struct smb_hdr hdr; /* wct = 18 */
1038 __u8 Reserved[3];
1039 __le32 TotalParameterCount;
1040 __le32 TotalDataCount;
1041 __le32 ParameterCount;
1042 __le32 ParameterOffset;
1043 __le32 ParameterDisplacement;
1044 __le32 DataCount;
1045 __le32 DataOffset;
1046 __le32 DataDisplacement;
1047 __u8 SetupCount; /* 0 */
1048 __u16 ByteCount;
1049 /* __u8 Pad[3]; */
1050 /* parms and data follow */
1051} __attribute__((packed)) NTRANSACT_RSP;
1052
1010typedef struct smb_com_transaction_ioctl_req { 1053typedef struct smb_com_transaction_ioctl_req {
1011 struct smb_hdr hdr; /* wct = 23 */ 1054 struct smb_hdr hdr; /* wct = 23 */
1012 __u8 MaxSetupCount; 1055 __u8 MaxSetupCount;
@@ -1021,11 +1064,11 @@ typedef struct smb_com_transaction_ioctl_req {
1021 __le32 DataOffset; 1064 __le32 DataOffset;
1022 __u8 SetupCount; /* four setup words follow subcommand */ 1065 __u8 SetupCount; /* four setup words follow subcommand */
1023 /* SNIA spec incorrectly included spurious pad here */ 1066 /* SNIA spec incorrectly included spurious pad here */
1024 __le16 SubCommand;/* 2 = IOCTL/FSCTL */ 1067 __le16 SubCommand; /* 2 = IOCTL/FSCTL */
1025 __le32 FunctionCode; 1068 __le32 FunctionCode;
1026 __u16 Fid; 1069 __u16 Fid;
1027 __u8 IsFsctl; /* 1 = File System Control, 0 = device control (IOCTL)*/ 1070 __u8 IsFsctl; /* 1 = File System Control 0 = device control (IOCTL) */
1028 __u8 IsRootFlag; /* 1 = apply command to root of share (must be DFS share)*/ 1071 __u8 IsRootFlag; /* 1 = apply command to root of share (must be DFS) */
1029 __le16 ByteCount; 1072 __le16 ByteCount;
1030 __u8 Pad[3]; 1073 __u8 Pad[3];
1031 __u8 Data[1]; 1074 __u8 Data[1];
@@ -1045,9 +1088,35 @@ typedef struct smb_com_transaction_ioctl_rsp {
1045 __u8 SetupCount; /* 1 */ 1088 __u8 SetupCount; /* 1 */
1046 __le16 ReturnedDataLen; 1089 __le16 ReturnedDataLen;
1047 __u16 ByteCount; 1090 __u16 ByteCount;
1048 __u8 Pad[3];
1049} __attribute__((packed)) TRANSACT_IOCTL_RSP; 1091} __attribute__((packed)) TRANSACT_IOCTL_RSP;
1050 1092
1093#define CIFS_ACL_OWNER 1
1094#define CIFS_ACL_GROUP 2
1095#define CIFS_ACL_DACL 4
1096#define CIFS_ACL_SACL 8
1097
1098typedef struct smb_com_transaction_qsec_req {
1099 struct smb_hdr hdr; /* wct = 19 */
1100 __u8 MaxSetupCount;
1101 __u16 Reserved;
1102 __le32 TotalParameterCount;
1103 __le32 TotalDataCount;
1104 __le32 MaxParameterCount;
1105 __le32 MaxDataCount;
1106 __le32 ParameterCount;
1107 __le32 ParameterOffset;
1108 __le32 DataCount;
1109 __le32 DataOffset;
1110 __u8 SetupCount; /* no setup words follow subcommand */
1111 /* SNIA spec incorrectly included spurious pad here */
1112 __le16 SubCommand; /* 6 = QUERY_SECURITY_DESC */
1113 __le16 ByteCount; /* bcc = 3 + 8 */
1114 __u8 Pad[3];
1115 __u16 Fid;
1116 __u16 Reserved2;
1117 __le32 AclFlags;
1118} __attribute__((packed)) QUERY_SEC_DESC_REQ;
1119
1051typedef struct smb_com_transaction_change_notify_req { 1120typedef struct smb_com_transaction_change_notify_req {
1052 struct smb_hdr hdr; /* wct = 23 */ 1121 struct smb_hdr hdr; /* wct = 23 */
1053 __u8 MaxSetupCount; 1122 __u8 MaxSetupCount;
@@ -1068,10 +1137,12 @@ typedef struct smb_com_transaction_change_notify_req {
1068 __u8 WatchTree; /* 1 = Monitor subdirectories */ 1137 __u8 WatchTree; /* 1 = Monitor subdirectories */
1069 __u8 Reserved2; 1138 __u8 Reserved2;
1070 __le16 ByteCount; 1139 __le16 ByteCount;
1071/* __u8 Pad[3];*/ 1140/* __u8 Pad[3];*/
1072/* __u8 Data[1];*/ 1141/* __u8 Data[1];*/
1073} __attribute__((packed)) TRANSACT_CHANGE_NOTIFY_REQ; 1142} __attribute__((packed)) TRANSACT_CHANGE_NOTIFY_REQ;
1074 1143
1144/* BB eventually change to use generic ntransact rsp struct
1145 and validation routine */
1075typedef struct smb_com_transaction_change_notify_rsp { 1146typedef struct smb_com_transaction_change_notify_rsp {
1076 struct smb_hdr hdr; /* wct = 18 */ 1147 struct smb_hdr hdr; /* wct = 18 */
1077 __u8 Reserved[3]; 1148 __u8 Reserved[3];
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1b73f4f4c5ce..3c03aadaff0c 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -48,8 +48,8 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *,
48 struct smb_hdr * /* out */ , 48 struct smb_hdr * /* out */ ,
49 int * /* bytes returned */ , const int long_op); 49 int * /* bytes returned */ , const int long_op);
50extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *, 50extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *,
51 struct kvec *, int /* nvec */, 51 struct kvec *, int /* nvec to send */,
52 int * /* bytes returned */ , const int long_op); 52 int * /* type of buf returned */ , const int long_op);
53extern int checkSMBhdr(struct smb_hdr *smb, __u16 mid); 53extern int checkSMBhdr(struct smb_hdr *smb, __u16 mid);
54extern int checkSMB(struct smb_hdr *smb, __u16 mid, int length); 54extern int checkSMB(struct smb_hdr *smb, __u16 mid, int length);
55extern int is_valid_oplock_break(struct smb_hdr *smb); 55extern int is_valid_oplock_break(struct smb_hdr *smb);
@@ -93,11 +93,12 @@ extern int CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
93 const struct nls_table *); 93 const struct nls_table *);
94 94
95extern int CIFSFindFirst(const int xid, struct cifsTconInfo *tcon, 95extern int CIFSFindFirst(const int xid, struct cifsTconInfo *tcon,
96 const char *searchName, const struct nls_table *nls_codepage, 96 const char *searchName, const struct nls_table *nls_codepage,
97 __u16 *searchHandle, struct cifs_search_info * psrch_inf, int map, const char dirsep); 97 __u16 *searchHandle, struct cifs_search_info * psrch_inf,
98 int map, const char dirsep);
98 99
99extern int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, 100extern int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
100 __u16 searchHandle, struct cifs_search_info * psrch_inf); 101 __u16 searchHandle, struct cifs_search_info * psrch_inf);
101 102
102extern int CIFSFindClose(const int, struct cifsTconInfo *tcon, 103extern int CIFSFindClose(const int, struct cifsTconInfo *tcon,
103 const __u16 search_handle); 104 const __u16 search_handle);
@@ -230,19 +231,18 @@ extern int CIFSSMBClose(const int xid, struct cifsTconInfo *tcon,
230 const int smb_file_id); 231 const int smb_file_id);
231 232
232extern int CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, 233extern int CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
233 const int netfid, unsigned int count, 234 const int netfid, unsigned int count,
234 const __u64 lseek, unsigned int *nbytes, char **buf); 235 const __u64 lseek, unsigned int *nbytes, char **buf,
236 int * return_buf_type);
235extern int CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, 237extern int CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
236 const int netfid, const unsigned int count, 238 const int netfid, const unsigned int count,
237 const __u64 lseek, unsigned int *nbytes, 239 const __u64 lseek, unsigned int *nbytes,
238 const char *buf, const char __user *ubuf, 240 const char *buf, const char __user *ubuf,
239 const int long_op); 241 const int long_op);
240#ifdef CONFIG_CIFS_EXPERIMENTAL
241extern int CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon, 242extern int CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
242 const int netfid, const unsigned int count, 243 const int netfid, const unsigned int count,
243 const __u64 offset, unsigned int *nbytes, 244 const __u64 offset, unsigned int *nbytes,
244 struct kvec *iov, const int nvec, const int long_op); 245 struct kvec *iov, const int nvec, const int long_op);
245#endif /* CONFIG_CIFS_EXPERIMENTAL */
246extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon, 246extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
247 const unsigned char *searchName, __u64 * inode_number, 247 const unsigned char *searchName, __u64 * inode_number,
248 const struct nls_table *nls_codepage, 248 const struct nls_table *nls_codepage,
@@ -269,6 +269,8 @@ extern void tconInfoFree(struct cifsTconInfo *);
269extern int cifs_reconnect(struct TCP_Server_Info *server); 269extern int cifs_reconnect(struct TCP_Server_Info *server);
270 270
271extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *,__u32 *); 271extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *,__u32 *);
272extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
273 __u32 *);
272extern int cifs_verify_signature(struct smb_hdr *, const char * mac_key, 274extern int cifs_verify_signature(struct smb_hdr *, const char * mac_key,
273 __u32 expected_sequence_number); 275 __u32 expected_sequence_number);
274extern int cifs_calculate_mac_key(char * key,const char * rn,const char * pass); 276extern int cifs_calculate_mac_key(char * key,const char * rn,const char * pass);
@@ -297,6 +299,9 @@ extern int CIFSSMBSetEA(const int xid, struct cifsTconInfo *tcon,
297 const char *fileName, const char * ea_name, 299 const char *fileName, const char * ea_name,
298 const void * ea_value, const __u16 ea_value_len, 300 const void * ea_value, const __u16 ea_value_len,
299 const struct nls_table *nls_codepage, int remap_special_chars); 301 const struct nls_table *nls_codepage, int remap_special_chars);
302extern int CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon,
303 __u16 fid, char *acl_inf, const int buflen,
304 const int acl_type /* ACCESS vs. DEFAULT */);
300extern int CIFSSMBGetPosixACL(const int xid, struct cifsTconInfo *tcon, 305extern int CIFSSMBGetPosixACL(const int xid, struct cifsTconInfo *tcon,
301 const unsigned char *searchName, 306 const unsigned char *searchName,
302 char *acl_inf, const int buflen,const int acl_type, 307 char *acl_inf, const int buflen,const int acl_type,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6867e556d37e..217323b0c896 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -37,6 +37,7 @@
37#include "cifsproto.h" 37#include "cifsproto.h"
38#include "cifs_unicode.h" 38#include "cifs_unicode.h"
39#include "cifs_debug.h" 39#include "cifs_debug.h"
40#include "cifsacl.h"
40 41
41#ifdef CONFIG_CIFS_POSIX 42#ifdef CONFIG_CIFS_POSIX
42static struct { 43static struct {
@@ -372,8 +373,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
372 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, 373 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
373 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 374 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
374 if (rc == 0) { 375 if (rc == 0) {
375 server->secMode = pSMBr->SecurityMode; 376 server->secMode = pSMBr->SecurityMode;
376 server->secType = NTLM; /* BB override default for 377 if((server->secMode & SECMODE_USER) == 0)
378 cFYI(1,("share mode security"));
379 server->secType = NTLM; /* BB override default for
377 NTLMv2 or kerberos v5 */ 380 NTLMv2 or kerberos v5 */
378 /* one byte - no need to convert this or EncryptionKeyLen 381 /* one byte - no need to convert this or EncryptionKeyLen
379 from little endian */ 382 from little endian */
@@ -383,7 +386,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
383 min(le32_to_cpu(pSMBr->MaxBufferSize), 386 min(le32_to_cpu(pSMBr->MaxBufferSize),
384 (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); 387 (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
385 server->maxRw = le32_to_cpu(pSMBr->MaxRawSize); 388 server->maxRw = le32_to_cpu(pSMBr->MaxRawSize);
386 cFYI(0, ("Max buf = %d ", ses->server->maxBuf)); 389 cFYI(0, ("Max buf = %d", ses->server->maxBuf));
387 GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey); 390 GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey);
388 server->capabilities = le32_to_cpu(pSMBr->Capabilities); 391 server->capabilities = le32_to_cpu(pSMBr->Capabilities);
389 server->timeZone = le16_to_cpu(pSMBr->ServerTimeZone); 392 server->timeZone = le16_to_cpu(pSMBr->ServerTimeZone);
@@ -411,8 +414,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
411 (server->server_GUID, 414 (server->server_GUID,
412 pSMBr->u.extended_response. 415 pSMBr->u.extended_response.
413 GUID, 16) != 0) { 416 GUID, 16) != 0) {
414 cFYI(1, 417 cFYI(1, ("server UID changed"));
415 ("UID of server does not match previous connection to same ip address"));
416 memcpy(server-> 418 memcpy(server->
417 server_GUID, 419 server_GUID,
418 pSMBr->u. 420 pSMBr->u.
@@ -958,21 +960,19 @@ openRetry:
958 return rc; 960 return rc;
959} 961}
960 962
961/* If no buffer passed in, then caller wants to do the copy
962 as in the case of readpages so the SMB buffer must be
963 freed by the caller */
964
965int 963int
966CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, 964CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
967 const int netfid, const unsigned int count, 965 const int netfid, const unsigned int count,
968 const __u64 lseek, unsigned int *nbytes, char **buf) 966 const __u64 lseek, unsigned int *nbytes, char **buf,
967 int * pbuf_type)
969{ 968{
970 int rc = -EACCES; 969 int rc = -EACCES;
971 READ_REQ *pSMB = NULL; 970 READ_REQ *pSMB = NULL;
972 READ_RSP *pSMBr = NULL; 971 READ_RSP *pSMBr = NULL;
973 char *pReadData = NULL; 972 char *pReadData = NULL;
974 int bytes_returned;
975 int wct; 973 int wct;
974 int resp_buf_type = 0;
975 struct kvec iov[1];
976 976
977 cFYI(1,("Reading %d bytes on fid %d",count,netfid)); 977 cFYI(1,("Reading %d bytes on fid %d",count,netfid));
978 if(tcon->ses->capabilities & CAP_LARGE_FILES) 978 if(tcon->ses->capabilities & CAP_LARGE_FILES)
@@ -981,8 +981,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
981 wct = 10; /* old style read */ 981 wct = 10; /* old style read */
982 982
983 *nbytes = 0; 983 *nbytes = 0;
984 rc = smb_init(SMB_COM_READ_ANDX, wct, tcon, (void **) &pSMB, 984 rc = small_smb_init(SMB_COM_READ_ANDX, wct, tcon, (void **) &pSMB);
985 (void **) &pSMBr);
986 if (rc) 985 if (rc)
987 return rc; 986 return rc;
988 987
@@ -990,13 +989,13 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
990 if (tcon->ses->server == NULL) 989 if (tcon->ses->server == NULL)
991 return -ECONNABORTED; 990 return -ECONNABORTED;
992 991
993 pSMB->AndXCommand = 0xFF; /* none */ 992 pSMB->AndXCommand = 0xFF; /* none */
994 pSMB->Fid = netfid; 993 pSMB->Fid = netfid;
995 pSMB->OffsetLow = cpu_to_le32(lseek & 0xFFFFFFFF); 994 pSMB->OffsetLow = cpu_to_le32(lseek & 0xFFFFFFFF);
996 if(wct == 12) 995 if(wct == 12)
997 pSMB->OffsetHigh = cpu_to_le32(lseek >> 32); 996 pSMB->OffsetHigh = cpu_to_le32(lseek >> 32);
998 else if((lseek >> 32) > 0) /* can not handle this big offset for old */ 997 else if((lseek >> 32) > 0) /* can not handle this big offset for old */
999 return -EIO; 998 return -EIO;
1000 999
1001 pSMB->Remaining = 0; 1000 pSMB->Remaining = 0;
1002 pSMB->MaxCount = cpu_to_le16(count & 0xFFFF); 1001 pSMB->MaxCount = cpu_to_le16(count & 0xFFFF);
@@ -1005,14 +1004,18 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
1005 pSMB->ByteCount = 0; /* no need to do le conversion since 0 */ 1004 pSMB->ByteCount = 0; /* no need to do le conversion since 0 */
1006 else { 1005 else {
1007 /* old style read */ 1006 /* old style read */
1008 struct smb_com_readx_req * pSMBW = 1007 struct smb_com_readx_req * pSMBW =
1009 (struct smb_com_readx_req *)pSMB; 1008 (struct smb_com_readx_req *)pSMB;
1010 pSMBW->ByteCount = 0; 1009 pSMBW->ByteCount = 0;
1011 } 1010 }
1012 1011
1013 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 1012 iov[0].iov_base = (char *)pSMB;
1014 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 1013 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
1014 rc = SendReceive2(xid, tcon->ses, iov,
1015 1 /* num iovecs */,
1016 &resp_buf_type, 0);
1015 cifs_stats_inc(&tcon->num_reads); 1017 cifs_stats_inc(&tcon->num_reads);
1018 pSMBr = (READ_RSP *)iov[0].iov_base;
1016 if (rc) { 1019 if (rc) {
1017 cERROR(1, ("Send error in read = %d", rc)); 1020 cERROR(1, ("Send error in read = %d", rc));
1018 } else { 1021 } else {
@@ -1022,33 +1025,43 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
1022 *nbytes = data_length; 1025 *nbytes = data_length;
1023 1026
1024 /*check that DataLength would not go beyond end of SMB */ 1027 /*check that DataLength would not go beyond end of SMB */
1025 if ((data_length > CIFSMaxBufSize) 1028 if ((data_length > CIFSMaxBufSize)
1026 || (data_length > count)) { 1029 || (data_length > count)) {
1027 cFYI(1,("bad length %d for count %d",data_length,count)); 1030 cFYI(1,("bad length %d for count %d",data_length,count));
1028 rc = -EIO; 1031 rc = -EIO;
1029 *nbytes = 0; 1032 *nbytes = 0;
1030 } else { 1033 } else {
1031 pReadData = 1034 pReadData = (char *) (&pSMBr->hdr.Protocol) +
1032 (char *) (&pSMBr->hdr.Protocol) +
1033 le16_to_cpu(pSMBr->DataOffset); 1035 le16_to_cpu(pSMBr->DataOffset);
1034/* if(rc = copy_to_user(buf, pReadData, data_length)) { 1036/* if(rc = copy_to_user(buf, pReadData, data_length)) {
1035 cERROR(1,("Faulting on read rc = %d",rc)); 1037 cERROR(1,("Faulting on read rc = %d",rc));
1036 rc = -EFAULT; 1038 rc = -EFAULT;
1037 }*/ /* can not use copy_to_user when using page cache*/ 1039 }*/ /* can not use copy_to_user when using page cache*/
1038 if(*buf) 1040 if(*buf)
1039 memcpy(*buf,pReadData,data_length); 1041 memcpy(*buf,pReadData,data_length);
1040 } 1042 }
1041 } 1043 }
1042 if(*buf)
1043 cifs_buf_release(pSMB);
1044 else
1045 *buf = (char *)pSMB;
1046 1044
1047 /* Note: On -EAGAIN error only caller can retry on handle based calls 1045 cifs_small_buf_release(pSMB);
1046 if(*buf) {
1047 if(resp_buf_type == CIFS_SMALL_BUFFER)
1048 cifs_small_buf_release(iov[0].iov_base);
1049 else if(resp_buf_type == CIFS_LARGE_BUFFER)
1050 cifs_buf_release(iov[0].iov_base);
1051 } else /* return buffer to caller to free */ /* BB FIXME how do we tell caller if it is not a large buffer */ {
1052 *buf = iov[0].iov_base;
1053 if(resp_buf_type == CIFS_SMALL_BUFFER)
1054 *pbuf_type = CIFS_SMALL_BUFFER;
1055 else if(resp_buf_type == CIFS_LARGE_BUFFER)
1056 *pbuf_type = CIFS_LARGE_BUFFER;
1057 }
1058
1059 /* Note: On -EAGAIN error only caller can retry on handle based calls
1048 since file handle passed in no longer valid */ 1060 since file handle passed in no longer valid */
1049 return rc; 1061 return rc;
1050} 1062}
1051 1063
1064
1052int 1065int
1053CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, 1066CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1054 const int netfid, const unsigned int count, 1067 const int netfid, const unsigned int count,
@@ -1155,7 +1168,6 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1155 return rc; 1168 return rc;
1156} 1169}
1157 1170
1158#ifdef CONFIG_CIFS_EXPERIMENTAL
1159int 1171int
1160CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon, 1172CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1161 const int netfid, const unsigned int count, 1173 const int netfid, const unsigned int count,
@@ -1164,10 +1176,10 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1164{ 1176{
1165 int rc = -EACCES; 1177 int rc = -EACCES;
1166 WRITE_REQ *pSMB = NULL; 1178 WRITE_REQ *pSMB = NULL;
1167 int bytes_returned, wct; 1179 int wct;
1168 int smb_hdr_len; 1180 int smb_hdr_len;
1181 int resp_buf_type = 0;
1169 1182
1170 /* BB removeme BB */
1171 cFYI(1,("write2 at %lld %d bytes", (long long)offset, count)); 1183 cFYI(1,("write2 at %lld %d bytes", (long long)offset, count));
1172 1184
1173 if(tcon->ses->capabilities & CAP_LARGE_FILES) 1185 if(tcon->ses->capabilities & CAP_LARGE_FILES)
@@ -1210,22 +1222,34 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1210 pSMBW->ByteCount = cpu_to_le16(count + 5); 1222 pSMBW->ByteCount = cpu_to_le16(count + 5);
1211 } 1223 }
1212 iov[0].iov_base = pSMB; 1224 iov[0].iov_base = pSMB;
1213 iov[0].iov_len = smb_hdr_len + 4; 1225 if(wct == 14)
1226 iov[0].iov_len = smb_hdr_len + 4;
1227 else /* wct == 12 pad bigger by four bytes */
1228 iov[0].iov_len = smb_hdr_len + 8;
1229
1214 1230
1215 rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &bytes_returned, 1231 rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type,
1216 long_op); 1232 long_op);
1217 cifs_stats_inc(&tcon->num_writes); 1233 cifs_stats_inc(&tcon->num_writes);
1218 if (rc) { 1234 if (rc) {
1219 cFYI(1, ("Send error Write2 = %d", rc)); 1235 cFYI(1, ("Send error Write2 = %d", rc));
1220 *nbytes = 0; 1236 *nbytes = 0;
1237 } else if(resp_buf_type == 0) {
1238 /* presumably this can not happen, but best to be safe */
1239 rc = -EIO;
1240 *nbytes = 0;
1221 } else { 1241 } else {
1222 WRITE_RSP * pSMBr = (WRITE_RSP *)pSMB; 1242 WRITE_RSP * pSMBr = (WRITE_RSP *)iov[0].iov_base;
1223 *nbytes = le16_to_cpu(pSMBr->CountHigh); 1243 *nbytes = le16_to_cpu(pSMBr->CountHigh);
1224 *nbytes = (*nbytes) << 16; 1244 *nbytes = (*nbytes) << 16;
1225 *nbytes += le16_to_cpu(pSMBr->Count); 1245 *nbytes += le16_to_cpu(pSMBr->Count);
1226 } 1246 }
1227 1247
1228 cifs_small_buf_release(pSMB); 1248 cifs_small_buf_release(pSMB);
1249 if(resp_buf_type == CIFS_SMALL_BUFFER)
1250 cifs_small_buf_release(iov[0].iov_base);
1251 else if(resp_buf_type == CIFS_LARGE_BUFFER)
1252 cifs_buf_release(iov[0].iov_base);
1229 1253
1230 /* Note: On -EAGAIN error only caller can retry on handle based calls 1254 /* Note: On -EAGAIN error only caller can retry on handle based calls
1231 since file handle passed in no longer valid */ 1255 since file handle passed in no longer valid */
@@ -1234,8 +1258,6 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1234} 1258}
1235 1259
1236 1260
1237#endif /* CIFS_EXPERIMENTAL */
1238
1239int 1261int
1240CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, 1262CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1241 const __u16 smb_file_id, const __u64 len, 1263 const __u16 smb_file_id, const __u64 len,
@@ -1906,6 +1928,90 @@ querySymLinkRetry:
1906 return rc; 1928 return rc;
1907} 1929}
1908 1930
1931/* Initialize NT TRANSACT SMB into small smb request buffer.
1932 This assumes that all NT TRANSACTS that we init here have
1933 total parm and data under about 400 bytes (to fit in small cifs
1934 buffer size), which is the case so far, it easily fits. NB:
1935 Setup words themselves and ByteCount
1936 MaxSetupCount (size of returned setup area) and
1937 MaxParameterCount (returned parms size) must be set by caller */
1938static int
1939smb_init_ntransact(const __u16 sub_command, const int setup_count,
1940 const int parm_len, struct cifsTconInfo *tcon,
1941 void ** ret_buf)
1942{
1943 int rc;
1944 __u32 temp_offset;
1945 struct smb_com_ntransact_req * pSMB;
1946
1947 rc = small_smb_init(SMB_COM_NT_TRANSACT, 19 + setup_count, tcon,
1948 (void **)&pSMB);
1949 if (rc)
1950 return rc;
1951 *ret_buf = (void *)pSMB;
1952 pSMB->Reserved = 0;
1953 pSMB->TotalParameterCount = cpu_to_le32(parm_len);
1954 pSMB->TotalDataCount = 0;
1955 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
1956 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
1957 pSMB->ParameterCount = pSMB->TotalParameterCount;
1958 pSMB->DataCount = pSMB->TotalDataCount;
1959 temp_offset = offsetof(struct smb_com_ntransact_req, Parms) +
1960 (setup_count * 2) - 4 /* for rfc1001 length itself */;
1961 pSMB->ParameterOffset = cpu_to_le32(temp_offset);
1962 pSMB->DataOffset = cpu_to_le32(temp_offset + parm_len);
1963 pSMB->SetupCount = setup_count; /* no need to le convert byte fields */
1964 pSMB->SubCommand = cpu_to_le16(sub_command);
1965 return 0;
1966}
1967
1968static int
1969validate_ntransact(char * buf, char ** ppparm, char ** ppdata,
1970 int * pdatalen, int * pparmlen)
1971{
1972 char * end_of_smb;
1973 __u32 data_count, data_offset, parm_count, parm_offset;
1974 struct smb_com_ntransact_rsp * pSMBr;
1975
1976 if(buf == NULL)
1977 return -EINVAL;
1978
1979 pSMBr = (struct smb_com_ntransact_rsp *)buf;
1980
1981 /* ByteCount was converted from little endian in SendReceive */
1982 end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount +
1983 (char *)&pSMBr->ByteCount;
1984
1985
1986 data_offset = le32_to_cpu(pSMBr->DataOffset);
1987 data_count = le32_to_cpu(pSMBr->DataCount);
1988 parm_offset = le32_to_cpu(pSMBr->ParameterOffset);
1989 parm_count = le32_to_cpu(pSMBr->ParameterCount);
1990
1991 *ppparm = (char *)&pSMBr->hdr.Protocol + parm_offset;
1992 *ppdata = (char *)&pSMBr->hdr.Protocol + data_offset;
1993
1994 /* should we also check that parm and data areas do not overlap? */
1995 if(*ppparm > end_of_smb) {
1996 cFYI(1,("parms start after end of smb"));
1997 return -EINVAL;
1998 } else if(parm_count + *ppparm > end_of_smb) {
1999 cFYI(1,("parm end after end of smb"));
2000 return -EINVAL;
2001 } else if(*ppdata > end_of_smb) {
2002 cFYI(1,("data starts after end of smb"));
2003 return -EINVAL;
2004 } else if(data_count + *ppdata > end_of_smb) {
2005 cFYI(1,("data %p + count %d (%p) ends after end of smb %p start %p",
2006 *ppdata, data_count, (data_count + *ppdata), end_of_smb, pSMBr)); /* BB FIXME */
2007 return -EINVAL;
2008 } else if(parm_count + data_count > pSMBr->ByteCount) {
2009 cFYI(1,("parm count and data count larger than SMB"));
2010 return -EINVAL;
2011 }
2012 return 0;
2013}
2014
1909int 2015int
1910CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, 2016CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
1911 const unsigned char *searchName, 2017 const unsigned char *searchName,
@@ -1928,7 +2034,8 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
1928 pSMB->TotalDataCount = 0; 2034 pSMB->TotalDataCount = 0;
1929 pSMB->MaxParameterCount = cpu_to_le32(2); 2035 pSMB->MaxParameterCount = cpu_to_le32(2);
1930 /* BB find exact data count max from sess structure BB */ 2036 /* BB find exact data count max from sess structure BB */
1931 pSMB->MaxDataCount = cpu_to_le32(4000); 2037 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
2038 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
1932 pSMB->MaxSetupCount = 4; 2039 pSMB->MaxSetupCount = 4;
1933 pSMB->Reserved = 0; 2040 pSMB->Reserved = 0;
1934 pSMB->ParameterOffset = 0; 2041 pSMB->ParameterOffset = 0;
@@ -1955,7 +2062,9 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
1955 rc = -EIO; /* bad smb */ 2062 rc = -EIO; /* bad smb */
1956 else { 2063 else {
1957 if(data_count && (data_count < 2048)) { 2064 if(data_count && (data_count < 2048)) {
1958 char * end_of_smb = pSMBr->ByteCount + (char *)&pSMBr->ByteCount; 2065 char * end_of_smb = 2 /* sizeof byte count */ +
2066 pSMBr->ByteCount +
2067 (char *)&pSMBr->ByteCount;
1959 2068
1960 struct reparse_data * reparse_buf = (struct reparse_data *) 2069 struct reparse_data * reparse_buf = (struct reparse_data *)
1961 ((char *)&pSMBr->hdr.Protocol + data_offset); 2070 ((char *)&pSMBr->hdr.Protocol + data_offset);
@@ -2199,6 +2308,7 @@ queryAclRetry:
2199 2308
2200 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2309 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2201 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2310 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2311 cifs_stats_inc(&tcon->num_acl_get);
2202 if (rc) { 2312 if (rc) {
2203 cFYI(1, ("Send error in Query POSIX ACL = %d", rc)); 2313 cFYI(1, ("Send error in Query POSIX ACL = %d", rc));
2204 } else { 2314 } else {
@@ -2386,6 +2496,92 @@ GetExtAttrOut:
2386 2496
2387#endif /* CONFIG_POSIX */ 2497#endif /* CONFIG_POSIX */
2388 2498
2499
2500/* security id for everyone */
2501const struct cifs_sid sid_everyone = {1, 1, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0}};
2502/* group users */
2503const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {32, 545, 0, 0}};
2504
2505/* Convert CIFS ACL to POSIX form */
2506static int parse_sec_desc(struct cifs_sid * psec_desc, int acl_len)
2507{
2508 return 0;
2509}
2510
2511/* Get Security Descriptor (by handle) from remote server for a file or dir */
2512int
2513CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
2514 /* BB fix up return info */ char *acl_inf, const int buflen,
2515 const int acl_type /* ACCESS/DEFAULT not sure implication */)
2516{
2517 int rc = 0;
2518 int buf_type = 0;
2519 QUERY_SEC_DESC_REQ * pSMB;
2520 struct kvec iov[1];
2521
2522 cFYI(1, ("GetCifsACL"));
2523
2524 rc = smb_init_ntransact(NT_TRANSACT_QUERY_SECURITY_DESC, 0,
2525 8 /* parm len */, tcon, (void **) &pSMB);
2526 if (rc)
2527 return rc;
2528
2529 pSMB->MaxParameterCount = cpu_to_le32(4);
2530 /* BB TEST with big acls that might need to be e.g. larger than 16K */
2531 pSMB->MaxSetupCount = 0;
2532 pSMB->Fid = fid; /* file handle always le */
2533 pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP |
2534 CIFS_ACL_DACL);
2535 pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */
2536 pSMB->hdr.smb_buf_length += 11;
2537 iov[0].iov_base = (char *)pSMB;
2538 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
2539
2540 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, 0);
2541 cifs_stats_inc(&tcon->num_acl_get);
2542 if (rc) {
2543 cFYI(1, ("Send error in QuerySecDesc = %d", rc));
2544 } else { /* decode response */
2545 struct cifs_sid * psec_desc;
2546 __le32 * parm;
2547 int parm_len;
2548 int data_len;
2549 int acl_len;
2550 struct smb_com_ntransact_rsp * pSMBr;
2551
2552/* validate_nttransact */
2553 rc = validate_ntransact(iov[0].iov_base, (char **)&parm,
2554 (char **)&psec_desc,
2555 &parm_len, &data_len);
2556
2557 if(rc)
2558 goto qsec_out;
2559 pSMBr = (struct smb_com_ntransact_rsp *)iov[0].iov_base;
2560
2561 cERROR(1,("smb %p parm %p data %p",pSMBr,parm,psec_desc)); /* BB removeme BB */
2562
2563 if (le32_to_cpu(pSMBr->ParameterCount) != 4) {
2564 rc = -EIO; /* bad smb */
2565 goto qsec_out;
2566 }
2567
2568/* BB check that data area is minimum length and as big as acl_len */
2569
2570 acl_len = le32_to_cpu(*(__le32 *)parm);
2571 /* BB check if(acl_len > bufsize) */
2572
2573 parse_sec_desc(psec_desc, acl_len);
2574 }
2575qsec_out:
2576 if(buf_type == CIFS_SMALL_BUFFER)
2577 cifs_small_buf_release(iov[0].iov_base);
2578 else if(buf_type == CIFS_LARGE_BUFFER)
2579 cifs_buf_release(iov[0].iov_base);
2580 cifs_small_buf_release(pSMB);
2581 return rc;
2582}
2583
2584
2389/* Legacy Query Path Information call for lookup to old servers such 2585/* Legacy Query Path Information call for lookup to old servers such
2390 as Win9x/WinME */ 2586 as Win9x/WinME */
2391int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon, 2587int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon,
@@ -4284,7 +4480,7 @@ int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
4284{ 4480{
4285 int rc = 0; 4481 int rc = 0;
4286 struct smb_com_transaction_change_notify_req * pSMB = NULL; 4482 struct smb_com_transaction_change_notify_req * pSMB = NULL;
4287 struct smb_com_transaction_change_notify_rsp * pSMBr = NULL; 4483 struct smb_com_ntransaction_change_notify_rsp * pSMBr = NULL;
4288 struct dir_notify_req *dnotify_req; 4484 struct dir_notify_req *dnotify_req;
4289 int bytes_returned; 4485 int bytes_returned;
4290 4486
@@ -4299,6 +4495,10 @@ int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
4299 pSMB->MaxParameterCount = cpu_to_le32(2); 4495 pSMB->MaxParameterCount = cpu_to_le32(2);
4300 /* BB find exact data count max from sess structure BB */ 4496 /* BB find exact data count max from sess structure BB */
4301 pSMB->MaxDataCount = 0; /* same in little endian or be */ 4497 pSMB->MaxDataCount = 0; /* same in little endian or be */
4498/* BB VERIFY verify which is correct for above BB */
4499 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
4500 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
4501
4302 pSMB->MaxSetupCount = 4; 4502 pSMB->MaxSetupCount = 4;
4303 pSMB->Reserved = 0; 4503 pSMB->Reserved = 0;
4304 pSMB->ParameterOffset = 0; 4504 pSMB->ParameterOffset = 0;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index c467de857610..88f60aa52058 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -76,12 +76,19 @@ struct smb_vol {
76 unsigned setuids:1; 76 unsigned setuids:1;
77 unsigned noperm:1; 77 unsigned noperm:1;
78 unsigned no_psx_acl:1; /* set if posix acl support should be disabled */ 78 unsigned no_psx_acl:1; /* set if posix acl support should be disabled */
79 unsigned cifs_acl:1;
79 unsigned no_xattr:1; /* set if xattr (EA) support should be disabled*/ 80 unsigned no_xattr:1; /* set if xattr (EA) support should be disabled*/
80 unsigned server_ino:1; /* use inode numbers from server ie UniqueId */ 81 unsigned server_ino:1; /* use inode numbers from server ie UniqueId */
81 unsigned direct_io:1; 82 unsigned direct_io:1;
82 unsigned remap:1; /* set to remap seven reserved chars in filenames */ 83 unsigned remap:1; /* set to remap seven reserved chars in filenames */
83 unsigned posix_paths:1; /* unset to not ask for posix pathnames. */ 84 unsigned posix_paths:1; /* unset to not ask for posix pathnames. */
84 unsigned sfu_emul:1; 85 unsigned sfu_emul:1;
86 unsigned krb5:1;
87 unsigned ntlm:1;
88 unsigned ntlmv2:1;
89 unsigned nullauth:1; /* attempt to authenticate with null user */
90 unsigned sign:1;
91 unsigned seal:1; /* encrypt */
85 unsigned nocase; /* request case insensitive filenames */ 92 unsigned nocase; /* request case insensitive filenames */
86 unsigned nobrl; /* disable sending byte range locks to srv */ 93 unsigned nobrl; /* disable sending byte range locks to srv */
87 unsigned int rsize; 94 unsigned int rsize;
@@ -508,7 +515,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
508 /* else length ok */ 515 /* else length ok */
509 reconnect = 0; 516 reconnect = 0;
510 517
511 if(pdu_length > MAX_CIFS_HDR_SIZE - 4) { 518 if(pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) {
512 isLargeBuf = TRUE; 519 isLargeBuf = TRUE;
513 memcpy(bigbuf, smallbuf, 4); 520 memcpy(bigbuf, smallbuf, 4);
514 smb_buffer = bigbuf; 521 smb_buffer = bigbuf;
@@ -777,7 +784,7 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
777 784
778 /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ 785 /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
779 vol->rw = TRUE; 786 vol->rw = TRUE;
780 787 vol->ntlm = TRUE;
781 /* default is always to request posix paths. */ 788 /* default is always to request posix paths. */
782 vol->posix_paths = 1; 789 vol->posix_paths = 1;
783 790
@@ -903,6 +910,39 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
903 printk(KERN_WARNING "CIFS: ip address too long\n"); 910 printk(KERN_WARNING "CIFS: ip address too long\n");
904 return 1; 911 return 1;
905 } 912 }
913 } else if (strnicmp(data, "sec", 3) == 0) {
914 if (!value || !*value) {
915 cERROR(1,("no security value specified"));
916 continue;
917 } else if (strnicmp(value, "krb5i", 5) == 0) {
918 vol->sign = 1;
919 vol->krb5 = 1;
920 } else if (strnicmp(value, "krb5p", 5) == 0) {
921 /* vol->seal = 1;
922 vol->krb5 = 1; */
923 cERROR(1,("Krb5 cifs privacy not supported"));
924 return 1;
925 } else if (strnicmp(value, "krb5", 4) == 0) {
926 vol->krb5 = 1;
927 } else if (strnicmp(value, "ntlmv2i", 7) == 0) {
928 vol->ntlmv2 = 1;
929 vol->sign = 1;
930 } else if (strnicmp(value, "ntlmv2", 6) == 0) {
931 vol->ntlmv2 = 1;
932 } else if (strnicmp(value, "ntlmi", 5) == 0) {
933 vol->ntlm = 1;
934 vol->sign = 1;
935 } else if (strnicmp(value, "ntlm", 4) == 0) {
936 /* ntlm is default so can be turned off too */
937 vol->ntlm = 1;
938 } else if (strnicmp(value, "nontlm", 6) == 0) {
939 vol->ntlm = 0;
940 } else if (strnicmp(value, "none", 4) == 0) {
941 vol->nullauth = 1;
942 } else {
943 cERROR(1,("bad security option: %s", value));
944 return 1;
945 }
906 } else if ((strnicmp(data, "unc", 3) == 0) 946 } else if ((strnicmp(data, "unc", 3) == 0)
907 || (strnicmp(data, "target", 6) == 0) 947 || (strnicmp(data, "target", 6) == 0)
908 || (strnicmp(data, "path", 4) == 0)) { 948 || (strnicmp(data, "path", 4) == 0)) {
@@ -1120,6 +1160,10 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
1120 vol->server_ino = 1; 1160 vol->server_ino = 1;
1121 } else if (strnicmp(data, "noserverino",9) == 0) { 1161 } else if (strnicmp(data, "noserverino",9) == 0) {
1122 vol->server_ino = 0; 1162 vol->server_ino = 0;
1163 } else if (strnicmp(data, "cifsacl",7) == 0) {
1164 vol->cifs_acl = 1;
1165 } else if (strnicmp(data, "nocifsacl", 9) == 0) {
1166 vol->cifs_acl = 0;
1123 } else if (strnicmp(data, "acl",3) == 0) { 1167 } else if (strnicmp(data, "acl",3) == 0) {
1124 vol->no_psx_acl = 0; 1168 vol->no_psx_acl = 0;
1125 } else if (strnicmp(data, "noacl",5) == 0) { 1169 } else if (strnicmp(data, "noacl",5) == 0) {
@@ -1546,7 +1590,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1546 cFYI(1, ("Username: %s ", volume_info.username)); 1590 cFYI(1, ("Username: %s ", volume_info.username));
1547 1591
1548 } else { 1592 } else {
1549 cifserror("No username specified "); 1593 cifserror("No username specified");
1550 /* In userspace mount helper we can get user name from alternate 1594 /* In userspace mount helper we can get user name from alternate
1551 locations such as env variables and files on disk */ 1595 locations such as env variables and files on disk */
1552 kfree(volume_info.UNC); 1596 kfree(volume_info.UNC);
@@ -1587,7 +1631,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1587 return -EINVAL; 1631 return -EINVAL;
1588 } else /* which servers DFS root would we conect to */ { 1632 } else /* which servers DFS root would we conect to */ {
1589 cERROR(1, 1633 cERROR(1,
1590 ("CIFS mount error: No UNC path (e.g. -o unc=//192.168.1.100/public) specified ")); 1634 ("CIFS mount error: No UNC path (e.g. -o unc=//192.168.1.100/public) specified"));
1591 kfree(volume_info.UNC); 1635 kfree(volume_info.UNC);
1592 kfree(volume_info.password); 1636 kfree(volume_info.password);
1593 FreeXid(xid); 1637 FreeXid(xid);
@@ -1626,7 +1670,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1626 1670
1627 1671
1628 if (srvTcp) { 1672 if (srvTcp) {
1629 cFYI(1, ("Existing tcp session with server found ")); 1673 cFYI(1, ("Existing tcp session with server found"));
1630 } else { /* create socket */ 1674 } else { /* create socket */
1631 if(volume_info.port) 1675 if(volume_info.port)
1632 sin_server.sin_port = htons(volume_info.port); 1676 sin_server.sin_port = htons(volume_info.port);
@@ -1689,11 +1733,11 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1689 1733
1690 if (existingCifsSes) { 1734 if (existingCifsSes) {
1691 pSesInfo = existingCifsSes; 1735 pSesInfo = existingCifsSes;
1692 cFYI(1, ("Existing smb sess found ")); 1736 cFYI(1, ("Existing smb sess found"));
1693 kfree(volume_info.password); 1737 kfree(volume_info.password);
1694 /* volume_info.UNC freed at end of function */ 1738 /* volume_info.UNC freed at end of function */
1695 } else if (!rc) { 1739 } else if (!rc) {
1696 cFYI(1, ("Existing smb sess not found ")); 1740 cFYI(1, ("Existing smb sess not found"));
1697 pSesInfo = sesInfoAlloc(); 1741 pSesInfo = sesInfoAlloc();
1698 if (pSesInfo == NULL) 1742 if (pSesInfo == NULL)
1699 rc = -ENOMEM; 1743 rc = -ENOMEM;
@@ -1751,7 +1795,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1751 cifs_sb->mnt_gid = volume_info.linux_gid; 1795 cifs_sb->mnt_gid = volume_info.linux_gid;
1752 cifs_sb->mnt_file_mode = volume_info.file_mode; 1796 cifs_sb->mnt_file_mode = volume_info.file_mode;
1753 cifs_sb->mnt_dir_mode = volume_info.dir_mode; 1797 cifs_sb->mnt_dir_mode = volume_info.dir_mode;
1754 cFYI(1,("file mode: 0x%x dir mode: 0x%x",cifs_sb->mnt_file_mode,cifs_sb->mnt_dir_mode)); 1798 cFYI(1,("file mode: 0x%x dir mode: 0x%x",
1799 cifs_sb->mnt_file_mode,cifs_sb->mnt_dir_mode));
1755 1800
1756 if(volume_info.noperm) 1801 if(volume_info.noperm)
1757 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; 1802 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
@@ -1767,6 +1812,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1767 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL; 1812 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
1768 if(volume_info.nobrl) 1813 if(volume_info.nobrl)
1769 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL; 1814 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
1815 if(volume_info.cifs_acl)
1816 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL;
1770 1817
1771 if(volume_info.direct_io) { 1818 if(volume_info.direct_io) {
1772 cFYI(1,("mounting share using direct i/o")); 1819 cFYI(1,("mounting share using direct i/o"));
@@ -1777,7 +1824,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1777 find_unc(sin_server.sin_addr.s_addr, volume_info.UNC, 1824 find_unc(sin_server.sin_addr.s_addr, volume_info.UNC,
1778 volume_info.username); 1825 volume_info.username);
1779 if (tcon) { 1826 if (tcon) {
1780 cFYI(1, ("Found match on UNC path ")); 1827 cFYI(1, ("Found match on UNC path"));
1781 /* we can have only one retry value for a connection 1828 /* we can have only one retry value for a connection
1782 to a share so for resources mounted more than once 1829 to a share so for resources mounted more than once
1783 to the same server share the last value passed in 1830 to the same server share the last value passed in
@@ -1926,7 +1973,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
1926 __u32 capabilities; 1973 __u32 capabilities;
1927 __u16 count; 1974 __u16 count;
1928 1975
1929 cFYI(1, ("In sesssetup ")); 1976 cFYI(1, ("In sesssetup"));
1930 if(ses == NULL) 1977 if(ses == NULL)
1931 return -EINVAL; 1978 return -EINVAL;
1932 user = ses->userName; 1979 user = ses->userName;
@@ -3202,9 +3249,26 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3202 3249
3203 pSMB->AndXCommand = 0xFF; 3250 pSMB->AndXCommand = 0xFF;
3204 pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO); 3251 pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO);
3205 pSMB->PasswordLength = cpu_to_le16(1); /* minimum */
3206 bcc_ptr = &pSMB->Password[0]; 3252 bcc_ptr = &pSMB->Password[0];
3207 bcc_ptr++; /* skip password */ 3253 if((ses->server->secMode) & SECMODE_USER) {
3254 pSMB->PasswordLength = cpu_to_le16(1); /* minimum */
3255 bcc_ptr++; /* skip password */
3256 } else {
3257 pSMB->PasswordLength = cpu_to_le16(CIFS_SESSION_KEY_SIZE);
3258 /* BB FIXME add code to fail this if NTLMv2 or Kerberos
3259 specified as required (when that support is added to
3260 the vfs in the future) as only NTLM or the much
3261 weaker LANMAN (which we do not send) is accepted
3262 by Samba (not sure whether other servers allow
3263 NTLMv2 password here) */
3264 SMBNTencrypt(ses->password,
3265 ses->server->cryptKey,
3266 bcc_ptr);
3267
3268 bcc_ptr += CIFS_SESSION_KEY_SIZE;
3269 *bcc_ptr = 0;
3270 bcc_ptr++; /* align */
3271 }
3208 3272
3209 if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 3273 if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
3210 smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; 3274 smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
@@ -3222,7 +3286,6 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3222 bcc_ptr += 2 * length; /* convert num of 16 bit words to bytes */ 3286 bcc_ptr += 2 * length; /* convert num of 16 bit words to bytes */
3223 bcc_ptr += 2; /* skip trailing null */ 3287 bcc_ptr += 2; /* skip trailing null */
3224 } else { /* ASCII */ 3288 } else { /* ASCII */
3225
3226 strcpy(bcc_ptr, tree); 3289 strcpy(bcc_ptr, tree);
3227 bcc_ptr += strlen(tree) + 1; 3290 bcc_ptr += strlen(tree) + 1;
3228 } 3291 }
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 32cc96cafa3e..fed55e3c53df 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * vfs operations that deal with dentries 4 * vfs operations that deal with dentries
5 * 5 *
6 * Copyright (C) International Business Machines Corp., 2002,2003 6 * Copyright (C) International Business Machines Corp., 2002,2005
7 * Author(s): Steve French (sfrench@us.ibm.com) 7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * 8 *
9 * This library is free software; you can redistribute it and/or modify 9 * This library is free software; you can redistribute it and/or modify
@@ -200,8 +200,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
200 (oplock & CIFS_CREATE_ACTION)) 200 (oplock & CIFS_CREATE_ACTION))
201 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 201 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
202 CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, 202 CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
203 (__u64)current->euid, 203 (__u64)current->fsuid,
204 (__u64)current->egid, 204 (__u64)current->fsgid,
205 0 /* dev */, 205 0 /* dev */,
206 cifs_sb->local_nls, 206 cifs_sb->local_nls,
207 cifs_sb->mnt_cifs_flags & 207 cifs_sb->mnt_cifs_flags &
@@ -325,7 +325,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
325 else if (pTcon->ses->capabilities & CAP_UNIX) { 325 else if (pTcon->ses->capabilities & CAP_UNIX) {
326 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 326 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
327 rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, 327 rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path,
328 mode,(__u64)current->euid,(__u64)current->egid, 328 mode,(__u64)current->fsuid,(__u64)current->fsgid,
329 device_number, cifs_sb->local_nls, 329 device_number, cifs_sb->local_nls,
330 cifs_sb->mnt_cifs_flags & 330 cifs_sb->mnt_cifs_flags &
331 CIFS_MOUNT_MAP_SPECIAL_CHR); 331 CIFS_MOUNT_MAP_SPECIAL_CHR);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 14a1c72ced92..77c990f0cb98 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -127,8 +127,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
127 if (file->f_dentry->d_inode->i_mapping) { 127 if (file->f_dentry->d_inode->i_mapping) {
128 /* BB no need to lock inode until after invalidate 128 /* BB no need to lock inode until after invalidate
129 since namei code should already have it locked? */ 129 since namei code should already have it locked? */
130 filemap_fdatawrite(file->f_dentry->d_inode->i_mapping); 130 filemap_write_and_wait(file->f_dentry->d_inode->i_mapping);
131 filemap_fdatawait(file->f_dentry->d_inode->i_mapping);
132 } 131 }
133 cFYI(1, ("invalidating remote inode since open detected it " 132 cFYI(1, ("invalidating remote inode since open detected it "
134 "changed")); 133 "changed"));
@@ -419,8 +418,7 @@ static int cifs_reopen_file(struct inode *inode, struct file *file,
419 pCifsInode = CIFS_I(inode); 418 pCifsInode = CIFS_I(inode);
420 if (pCifsInode) { 419 if (pCifsInode) {
421 if (can_flush) { 420 if (can_flush) {
422 filemap_fdatawrite(inode->i_mapping); 421 filemap_write_and_wait(inode->i_mapping);
423 filemap_fdatawait(inode->i_mapping);
424 /* temporarily disable caching while we 422 /* temporarily disable caching while we
425 go to server to get inode info */ 423 go to server to get inode info */
426 pCifsInode->clientCanCacheAll = FALSE; 424 pCifsInode->clientCanCacheAll = FALSE;
@@ -555,13 +553,13 @@ int cifs_closedir(struct inode *inode, struct file *file)
555 } 553 }
556 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start; 554 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
557 if (ptmp) { 555 if (ptmp) {
558 /* BB removeme BB */ cFYI(1, ("freeing smb buf in srch struct in closedir")); 556 cFYI(1, ("closedir free smb buf in srch struct"));
559 pCFileStruct->srch_inf.ntwrk_buf_start = NULL; 557 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
560 cifs_buf_release(ptmp); 558 cifs_buf_release(ptmp);
561 } 559 }
562 ptmp = pCFileStruct->search_resume_name; 560 ptmp = pCFileStruct->search_resume_name;
563 if (ptmp) { 561 if (ptmp) {
564 /* BB removeme BB */ cFYI(1, ("freeing resume name in closedir")); 562 cFYI(1, ("closedir free resume name"));
565 pCFileStruct->search_resume_name = NULL; 563 pCFileStruct->search_resume_name = NULL;
566 kfree(ptmp); 564 kfree(ptmp);
567 } 565 }
@@ -870,10 +868,9 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
870 if (rc != 0) 868 if (rc != 0)
871 break; 869 break;
872 } 870 }
873#ifdef CONFIG_CIFS_EXPERIMENTAL
874 /* BB FIXME We can not sign across two buffers yet */ 871 /* BB FIXME We can not sign across two buffers yet */
875 if((experimEnabled) && ((pTcon->ses->server->secMode & 872 if((pTcon->ses->server->secMode &
876 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) == 0)) { 873 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) == 0) {
877 struct kvec iov[2]; 874 struct kvec iov[2];
878 unsigned int len; 875 unsigned int len;
879 876
@@ -889,7 +886,6 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
889 iov, 1, long_op); 886 iov, 1, long_op);
890 } else 887 } else
891 /* BB FIXME fixup indentation of line below */ 888 /* BB FIXME fixup indentation of line below */
892#endif
893 rc = CIFSSMBWrite(xid, pTcon, 889 rc = CIFSSMBWrite(xid, pTcon,
894 open_file->netfid, 890 open_file->netfid,
895 min_t(const int, cifs_sb->wsize, 891 min_t(const int, cifs_sb->wsize,
@@ -1026,7 +1022,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1026 return rc; 1022 return rc;
1027} 1023}
1028 1024
1029#ifdef CONFIG_CIFS_EXPERIMENTAL
1030static int cifs_writepages(struct address_space *mapping, 1025static int cifs_writepages(struct address_space *mapping,
1031 struct writeback_control *wbc) 1026 struct writeback_control *wbc)
1032{ 1027{
@@ -1229,7 +1224,6 @@ retry:
1229 1224
1230 return rc; 1225 return rc;
1231} 1226}
1232#endif
1233 1227
1234static int cifs_writepage(struct page* page, struct writeback_control *wbc) 1228static int cifs_writepage(struct page* page, struct writeback_control *wbc)
1235{ 1229{
@@ -1428,6 +1422,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
1428 rc = -EAGAIN; 1422 rc = -EAGAIN;
1429 smb_read_data = NULL; 1423 smb_read_data = NULL;
1430 while (rc == -EAGAIN) { 1424 while (rc == -EAGAIN) {
1425 int buf_type = CIFS_NO_BUFFER;
1431 if ((open_file->invalidHandle) && 1426 if ((open_file->invalidHandle) &&
1432 (!open_file->closePend)) { 1427 (!open_file->closePend)) {
1433 rc = cifs_reopen_file(file->f_dentry->d_inode, 1428 rc = cifs_reopen_file(file->f_dentry->d_inode,
@@ -1436,20 +1431,22 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
1436 break; 1431 break;
1437 } 1432 }
1438 rc = CIFSSMBRead(xid, pTcon, 1433 rc = CIFSSMBRead(xid, pTcon,
1439 open_file->netfid, 1434 open_file->netfid,
1440 current_read_size, *poffset, 1435 current_read_size, *poffset,
1441 &bytes_read, &smb_read_data); 1436 &bytes_read, &smb_read_data,
1437 &buf_type);
1442 pSMBr = (struct smb_com_read_rsp *)smb_read_data; 1438 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1443 if (copy_to_user(current_offset, 1439 if (copy_to_user(current_offset,
1444 smb_read_data + 4 /* RFC1001 hdr */ 1440 smb_read_data + 4 /* RFC1001 hdr */
1445 + le16_to_cpu(pSMBr->DataOffset), 1441 + le16_to_cpu(pSMBr->DataOffset),
1446 bytes_read)) { 1442 bytes_read)) {
1447 rc = -EFAULT; 1443 rc = -EFAULT;
1448 FreeXid(xid); 1444 }
1449 return rc;
1450 }
1451 if (smb_read_data) { 1445 if (smb_read_data) {
1452 cifs_buf_release(smb_read_data); 1446 if(buf_type == CIFS_SMALL_BUFFER)
1447 cifs_small_buf_release(smb_read_data);
1448 else if(buf_type == CIFS_LARGE_BUFFER)
1449 cifs_buf_release(smb_read_data);
1453 smb_read_data = NULL; 1450 smb_read_data = NULL;
1454 } 1451 }
1455 } 1452 }
@@ -1482,6 +1479,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1482 int xid; 1479 int xid;
1483 char *current_offset; 1480 char *current_offset;
1484 struct cifsFileInfo *open_file; 1481 struct cifsFileInfo *open_file;
1482 int buf_type = CIFS_NO_BUFFER;
1485 1483
1486 xid = GetXid(); 1484 xid = GetXid();
1487 cifs_sb = CIFS_SB(file->f_dentry->d_sb); 1485 cifs_sb = CIFS_SB(file->f_dentry->d_sb);
@@ -1518,9 +1516,10 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1518 break; 1516 break;
1519 } 1517 }
1520 rc = CIFSSMBRead(xid, pTcon, 1518 rc = CIFSSMBRead(xid, pTcon,
1521 open_file->netfid, 1519 open_file->netfid,
1522 current_read_size, *poffset, 1520 current_read_size, *poffset,
1523 &bytes_read, &current_offset); 1521 &bytes_read, &current_offset,
1522 &buf_type);
1524 } 1523 }
1525 if (rc || (bytes_read == 0)) { 1524 if (rc || (bytes_read == 0)) {
1526 if (total_read) { 1525 if (total_read) {
@@ -1618,6 +1617,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1618 struct smb_com_read_rsp *pSMBr; 1617 struct smb_com_read_rsp *pSMBr;
1619 struct pagevec lru_pvec; 1618 struct pagevec lru_pvec;
1620 struct cifsFileInfo *open_file; 1619 struct cifsFileInfo *open_file;
1620 int buf_type = CIFS_NO_BUFFER;
1621 1621
1622 xid = GetXid(); 1622 xid = GetXid();
1623 if (file->private_data == NULL) { 1623 if (file->private_data == NULL) {
@@ -1674,14 +1674,17 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1674 } 1674 }
1675 1675
1676 rc = CIFSSMBRead(xid, pTcon, 1676 rc = CIFSSMBRead(xid, pTcon,
1677 open_file->netfid, 1677 open_file->netfid,
1678 read_size, offset, 1678 read_size, offset,
1679 &bytes_read, &smb_read_data); 1679 &bytes_read, &smb_read_data,
1680 1680 &buf_type);
1681 /* BB more RC checks ? */ 1681 /* BB more RC checks ? */
1682 if (rc== -EAGAIN) { 1682 if (rc== -EAGAIN) {
1683 if (smb_read_data) { 1683 if (smb_read_data) {
1684 cifs_buf_release(smb_read_data); 1684 if(buf_type == CIFS_SMALL_BUFFER)
1685 cifs_small_buf_release(smb_read_data);
1686 else if(buf_type == CIFS_LARGE_BUFFER)
1687 cifs_buf_release(smb_read_data);
1685 smb_read_data = NULL; 1688 smb_read_data = NULL;
1686 } 1689 }
1687 } 1690 }
@@ -1738,7 +1741,10 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1738 break; 1741 break;
1739 } 1742 }
1740 if (smb_read_data) { 1743 if (smb_read_data) {
1741 cifs_buf_release(smb_read_data); 1744 if(buf_type == CIFS_SMALL_BUFFER)
1745 cifs_small_buf_release(smb_read_data);
1746 else if(buf_type == CIFS_LARGE_BUFFER)
1747 cifs_buf_release(smb_read_data);
1742 smb_read_data = NULL; 1748 smb_read_data = NULL;
1743 } 1749 }
1744 bytes_read = 0; 1750 bytes_read = 0;
@@ -1748,7 +1754,10 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1748 1754
1749/* need to free smb_read_data buf before exit */ 1755/* need to free smb_read_data buf before exit */
1750 if (smb_read_data) { 1756 if (smb_read_data) {
1751 cifs_buf_release(smb_read_data); 1757 if(buf_type == CIFS_SMALL_BUFFER)
1758 cifs_small_buf_release(smb_read_data);
1759 else if(buf_type == CIFS_LARGE_BUFFER)
1760 cifs_buf_release(smb_read_data);
1752 smb_read_data = NULL; 1761 smb_read_data = NULL;
1753 } 1762 }
1754 1763
@@ -1827,10 +1836,20 @@ int is_size_safe_to_change(struct cifsInodeInfo *cifsInode)
1827 open_file = find_writable_file(cifsInode); 1836 open_file = find_writable_file(cifsInode);
1828 1837
1829 if(open_file) { 1838 if(open_file) {
1839 struct cifs_sb_info *cifs_sb;
1840
1830 /* there is not actually a write pending so let 1841 /* there is not actually a write pending so let
1831 this handle go free and allow it to 1842 this handle go free and allow it to
1832 be closable if needed */ 1843 be closable if needed */
1833 atomic_dec(&open_file->wrtPending); 1844 atomic_dec(&open_file->wrtPending);
1845
1846 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
1847 if ( cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO ) {
1848 /* since no page cache to corrupt on directio
1849 we can change size safely */
1850 return 1;
1851 }
1852
1834 return 0; 1853 return 0;
1835 } else 1854 } else
1836 return 1; 1855 return 1;
@@ -1875,9 +1894,7 @@ struct address_space_operations cifs_addr_ops = {
1875 .readpage = cifs_readpage, 1894 .readpage = cifs_readpage,
1876 .readpages = cifs_readpages, 1895 .readpages = cifs_readpages,
1877 .writepage = cifs_writepage, 1896 .writepage = cifs_writepage,
1878#ifdef CONFIG_CIFS_EXPERIMENTAL
1879 .writepages = cifs_writepages, 1897 .writepages = cifs_writepages,
1880#endif
1881 .prepare_write = cifs_prepare_write, 1898 .prepare_write = cifs_prepare_write,
1882 .commit_write = cifs_commit_write, 1899 .commit_write = cifs_commit_write,
1883 .set_page_dirty = __set_page_dirty_nobuffers, 1900 .set_page_dirty = __set_page_dirty_nobuffers,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 411c1f7f84da..59359911f481 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -229,11 +229,12 @@ static int decode_sfu_inode(struct inode * inode, __u64 size,
229 cifs_sb->mnt_cifs_flags & 229 cifs_sb->mnt_cifs_flags &
230 CIFS_MOUNT_MAP_SPECIAL_CHR); 230 CIFS_MOUNT_MAP_SPECIAL_CHR);
231 if (rc==0) { 231 if (rc==0) {
232 int buf_type = CIFS_NO_BUFFER;
232 /* Read header */ 233 /* Read header */
233 rc = CIFSSMBRead(xid, pTcon, 234 rc = CIFSSMBRead(xid, pTcon,
234 netfid, 235 netfid,
235 24 /* length */, 0 /* offset */, 236 24 /* length */, 0 /* offset */,
236 &bytes_read, &pbuf); 237 &bytes_read, &pbuf, &buf_type);
237 if((rc == 0) && (bytes_read >= 8)) { 238 if((rc == 0) && (bytes_read >= 8)) {
238 if(memcmp("IntxBLK", pbuf, 8) == 0) { 239 if(memcmp("IntxBLK", pbuf, 8) == 0) {
239 cFYI(1,("Block device")); 240 cFYI(1,("Block device"));
@@ -267,7 +268,7 @@ static int decode_sfu_inode(struct inode * inode, __u64 size,
267 } else { 268 } else {
268 inode->i_mode |= S_IFREG; /* then it is a file */ 269 inode->i_mode |= S_IFREG; /* then it is a file */
269 rc = -EOPNOTSUPP; /* or some unknown SFU type */ 270 rc = -EOPNOTSUPP; /* or some unknown SFU type */
270 } 271 }
271 CIFSSMBClose(xid, pTcon, netfid); 272 CIFSSMBClose(xid, pTcon, netfid);
272 } 273 }
273 return rc; 274 return rc;
@@ -750,8 +751,8 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
750 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 751 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
751 CIFSSMBUnixSetPerms(xid, pTcon, full_path, 752 CIFSSMBUnixSetPerms(xid, pTcon, full_path,
752 mode, 753 mode,
753 (__u64)current->euid, 754 (__u64)current->fsuid,
754 (__u64)current->egid, 755 (__u64)current->fsgid,
755 0 /* dev_t */, 756 0 /* dev_t */,
756 cifs_sb->local_nls, 757 cifs_sb->local_nls,
757 cifs_sb->mnt_cifs_flags & 758 cifs_sb->mnt_cifs_flags &
@@ -1040,9 +1041,9 @@ int cifs_revalidate(struct dentry *direntry)
1040 } 1041 }
1041 1042
1042 /* can not grab this sem since kernel filesys locking documentation 1043 /* can not grab this sem since kernel filesys locking documentation
1043 indicates i_sem may be taken by the kernel on lookup and rename 1044 indicates i_mutex may be taken by the kernel on lookup and rename
1044 which could deadlock if we grab the i_sem here as well */ 1045 which could deadlock if we grab the i_mutex here as well */
1045/* down(&direntry->d_inode->i_sem);*/ 1046/* mutex_lock(&direntry->d_inode->i_mutex);*/
1046 /* need to write out dirty pages here */ 1047 /* need to write out dirty pages here */
1047 if (direntry->d_inode->i_mapping) { 1048 if (direntry->d_inode->i_mapping) {
1048 /* do we need to lock inode until after invalidate completes 1049 /* do we need to lock inode until after invalidate completes
@@ -1066,7 +1067,7 @@ int cifs_revalidate(struct dentry *direntry)
1066 } 1067 }
1067 } 1068 }
1068 } 1069 }
1069/* up(&direntry->d_inode->i_sem); */ 1070/* mutex_unlock(&direntry->d_inode->i_mutex); */
1070 1071
1071 kfree(full_path); 1072 kfree(full_path);
1072 FreeXid(xid); 1073 FreeXid(xid);
@@ -1148,8 +1149,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1148 /* BB check if we need to refresh inode from server now ? BB */ 1149 /* BB check if we need to refresh inode from server now ? BB */
1149 1150
1150 /* need to flush data before changing file size on server */ 1151 /* need to flush data before changing file size on server */
1151 filemap_fdatawrite(direntry->d_inode->i_mapping); 1152 filemap_write_and_wait(direntry->d_inode->i_mapping);
1152 filemap_fdatawait(direntry->d_inode->i_mapping);
1153 1153
1154 if (attrs->ia_valid & ATTR_SIZE) { 1154 if (attrs->ia_valid & ATTR_SIZE) {
1155 /* To avoid spurious oplock breaks from server, in the case of 1155 /* To avoid spurious oplock breaks from server, in the case of
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 94baf6c8ecbd..812c6bb0fe38 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/misc.c 2 * fs/cifs/misc.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2004 4 * Copyright (C) International Business Machines Corp., 2002,2005
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -161,6 +161,9 @@ cifs_buf_get(void)
161 if (ret_buf) { 161 if (ret_buf) {
162 memset(ret_buf, 0, sizeof(struct smb_hdr) + 3); 162 memset(ret_buf, 0, sizeof(struct smb_hdr) + 3);
163 atomic_inc(&bufAllocCount); 163 atomic_inc(&bufAllocCount);
164#ifdef CONFIG_CIFS_STATS2
165 atomic_inc(&totBufAllocCount);
166#endif /* CONFIG_CIFS_STATS2 */
164 } 167 }
165 168
166 return ret_buf; 169 return ret_buf;
@@ -195,6 +198,10 @@ cifs_small_buf_get(void)
195 /* No need to clear memory here, cleared in header assemble */ 198 /* No need to clear memory here, cleared in header assemble */
196 /* memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/ 199 /* memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/
197 atomic_inc(&smBufAllocCount); 200 atomic_inc(&smBufAllocCount);
201#ifdef CONFIG_CIFS_STATS2
202 atomic_inc(&totSmBufAllocCount);
203#endif /* CONFIG_CIFS_STATS2 */
204
198 } 205 }
199 return ret_buf; 206 return ret_buf;
200} 207}
@@ -292,7 +299,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
292 struct cifsSesInfo * ses; 299 struct cifsSesInfo * ses;
293 char *temp = (char *) buffer; 300 char *temp = (char *) buffer;
294 301
295 memset(temp,0,MAX_CIFS_HDR_SIZE); 302 memset(temp,0,256); /* bigger than MAX_CIFS_HDR_SIZE */
296 303
297 buffer->smb_buf_length = 304 buffer->smb_buf_length =
298 (2 * word_count) + sizeof (struct smb_hdr) - 305 (2 * word_count) + sizeof (struct smb_hdr) -
@@ -348,12 +355,12 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
348 /* BB Add support for establishing new tCon and SMB Session */ 355 /* BB Add support for establishing new tCon and SMB Session */
349 /* with userid/password pairs found on the smb session */ 356 /* with userid/password pairs found on the smb session */
350 /* for other target tcp/ip addresses BB */ 357 /* for other target tcp/ip addresses BB */
351 if(current->uid != treeCon->ses->linux_uid) { 358 if(current->fsuid != treeCon->ses->linux_uid) {
352 cFYI(1,("Multiuser mode and UID did not match tcon uid ")); 359 cFYI(1,("Multiuser mode and UID did not match tcon uid"));
353 read_lock(&GlobalSMBSeslock); 360 read_lock(&GlobalSMBSeslock);
354 list_for_each(temp_item, &GlobalSMBSessionList) { 361 list_for_each(temp_item, &GlobalSMBSessionList) {
355 ses = list_entry(temp_item, struct cifsSesInfo, cifsSessionList); 362 ses = list_entry(temp_item, struct cifsSesInfo, cifsSessionList);
356 if(ses->linux_uid == current->uid) { 363 if(ses->linux_uid == current->fsuid) {
357 if(ses->server == treeCon->ses->server) { 364 if(ses->server == treeCon->ses->server) {
358 cFYI(1,("found matching uid substitute right smb_uid")); 365 cFYI(1,("found matching uid substitute right smb_uid"));
359 buffer->Uid = ses->Suid; 366 buffer->Uid = ses->Suid;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 9bdaaecae36f..288cc048d37f 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -214,8 +214,7 @@ static void fill_in_inode(struct inode *tmp_inode,
214 tmp_inode->i_fop = &cifs_file_nobrl_ops; 214 tmp_inode->i_fop = &cifs_file_nobrl_ops;
215 else 215 else
216 tmp_inode->i_fop = &cifs_file_ops; 216 tmp_inode->i_fop = &cifs_file_ops;
217 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 217
218 tmp_inode->i_fop->lock = NULL;
219 tmp_inode->i_data.a_ops = &cifs_addr_ops; 218 tmp_inode->i_data.a_ops = &cifs_addr_ops;
220 if((cifs_sb->tcon) && (cifs_sb->tcon->ses) && 219 if((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
221 (cifs_sb->tcon->ses->server->maxBuf < 220 (cifs_sb->tcon->ses->server->maxBuf <
@@ -327,12 +326,18 @@ static void unix_fill_in_inode(struct inode *tmp_inode,
327 if (S_ISREG(tmp_inode->i_mode)) { 326 if (S_ISREG(tmp_inode->i_mode)) {
328 cFYI(1, ("File inode")); 327 cFYI(1, ("File inode"));
329 tmp_inode->i_op = &cifs_file_inode_ops; 328 tmp_inode->i_op = &cifs_file_inode_ops;
330 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) 329
331 tmp_inode->i_fop = &cifs_file_direct_ops; 330 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
331 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
332 tmp_inode->i_fop = &cifs_file_direct_nobrl_ops;
333 else
334 tmp_inode->i_fop = &cifs_file_direct_ops;
335
336 } else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
337 tmp_inode->i_fop = &cifs_file_nobrl_ops;
332 else 338 else
333 tmp_inode->i_fop = &cifs_file_ops; 339 tmp_inode->i_fop = &cifs_file_ops;
334 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 340
335 tmp_inode->i_fop->lock = NULL;
336 tmp_inode->i_data.a_ops = &cifs_addr_ops; 341 tmp_inode->i_data.a_ops = &cifs_addr_ops;
337 if((cifs_sb->tcon) && (cifs_sb->tcon->ses) && 342 if((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
338 (cifs_sb->tcon->ses->server->maxBuf < 343 (cifs_sb->tcon->ses->server->maxBuf <
diff --git a/fs/cifs/rfc1002pdu.h b/fs/cifs/rfc1002pdu.h
index 9222033cad8e..aede606132aa 100644
--- a/fs/cifs/rfc1002pdu.h
+++ b/fs/cifs/rfc1002pdu.h
@@ -24,11 +24,11 @@
24/* NB: unlike smb/cifs packets, the RFC1002 structures are big endian */ 24/* NB: unlike smb/cifs packets, the RFC1002 structures are big endian */
25 25
26 /* RFC 1002 session packet types */ 26 /* RFC 1002 session packet types */
27#define RFC1002_SESSION_MESASAGE 0x00 27#define RFC1002_SESSION_MESSAGE 0x00
28#define RFC1002_SESSION_REQUEST 0x81 28#define RFC1002_SESSION_REQUEST 0x81
29#define RFC1002_POSITIVE_SESSION_RESPONSE 0x82 29#define RFC1002_POSITIVE_SESSION_RESPONSE 0x82
30#define RFC1002_NEGATIVE_SESSION_RESPONSE 0x83 30#define RFC1002_NEGATIVE_SESSION_RESPONSE 0x83
31#define RFC1002_RETARGET_SESSION_RESPONSE 0x83 31#define RFC1002_RETARGET_SESSION_RESPONSE 0x84
32#define RFC1002_SESSION_KEEP_ALIVE 0x85 32#define RFC1002_SESSION_KEEP_ALIVE 0x85
33 33
34 /* RFC 1002 flags (only one defined */ 34 /* RFC 1002 flags (only one defined */
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index f8871196098c..7b98792150ea 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -206,7 +206,6 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
206 return rc; 206 return rc;
207} 207}
208 208
209#ifdef CONFIG_CIFS_EXPERIMENTAL
210static int 209static int
211smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, 210smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
212 struct sockaddr *sin) 211 struct sockaddr *sin)
@@ -299,7 +298,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
299 298
300int 299int
301SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, 300SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
302 struct kvec *iov, int n_vec, int *pbytes_returned, 301 struct kvec *iov, int n_vec, int * pRespBufType /* ret */,
303 const int long_op) 302 const int long_op)
304{ 303{
305 int rc = 0; 304 int rc = 0;
@@ -307,6 +306,8 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
307 unsigned long timeout; 306 unsigned long timeout;
308 struct mid_q_entry *midQ; 307 struct mid_q_entry *midQ;
309 struct smb_hdr *in_buf = iov[0].iov_base; 308 struct smb_hdr *in_buf = iov[0].iov_base;
309
310 *pRespBufType = CIFS_NO_BUFFER; /* no response buf yet */
310 311
311 if (ses == NULL) { 312 if (ses == NULL) {
312 cERROR(1,("Null smb session")); 313 cERROR(1,("Null smb session"));
@@ -392,8 +393,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
392 return -ENOMEM; 393 return -ENOMEM;
393 } 394 }
394 395
395/* BB FIXME */ 396 rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number);
396/* rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number); */
397 397
398 midQ->midState = MID_REQUEST_SUBMITTED; 398 midQ->midState = MID_REQUEST_SUBMITTED;
399#ifdef CONFIG_CIFS_STATS2 399#ifdef CONFIG_CIFS_STATS2
@@ -489,21 +489,23 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
489 receive_len, xid)); 489 receive_len, xid));
490 rc = -EIO; 490 rc = -EIO;
491 } else { /* rcvd frame is ok */ 491 } else { /* rcvd frame is ok */
492
493 if (midQ->resp_buf && 492 if (midQ->resp_buf &&
494 (midQ->midState == MID_RESPONSE_RECEIVED)) { 493 (midQ->midState == MID_RESPONSE_RECEIVED)) {
495 in_buf->smb_buf_length = receive_len;
496 /* BB verify that length would not overrun small buf */
497 memcpy((char *)in_buf + 4,
498 (char *)midQ->resp_buf + 4,
499 receive_len);
500 494
501 dump_smb(in_buf, 80); 495 iov[0].iov_base = (char *)midQ->resp_buf;
496 if(midQ->largeBuf)
497 *pRespBufType = CIFS_LARGE_BUFFER;
498 else
499 *pRespBufType = CIFS_SMALL_BUFFER;
500 iov[0].iov_len = receive_len + 4;
501 iov[1].iov_len = 0;
502
503 dump_smb(midQ->resp_buf, 80);
502 /* convert the length into a more usable form */ 504 /* convert the length into a more usable form */
503 if((receive_len > 24) && 505 if((receive_len > 24) &&
504 (ses->server->secMode & (SECMODE_SIGN_REQUIRED | 506 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
505 SECMODE_SIGN_ENABLED))) { 507 SECMODE_SIGN_ENABLED))) {
506 rc = cifs_verify_signature(in_buf, 508 rc = cifs_verify_signature(midQ->resp_buf,
507 ses->server->mac_signing_key, 509 ses->server->mac_signing_key,
508 midQ->sequence_number+1); 510 midQ->sequence_number+1);
509 if(rc) { 511 if(rc) {
@@ -512,18 +514,19 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
512 } 514 }
513 } 515 }
514 516
515 *pbytes_returned = in_buf->smb_buf_length;
516
517 /* BB special case reconnect tid and uid here? */ 517 /* BB special case reconnect tid and uid here? */
518 /* BB special case Errbadpassword and pwdexpired here */ 518 /* BB special case Errbadpassword and pwdexpired here */
519 rc = map_smb_to_linux_error(in_buf); 519 rc = map_smb_to_linux_error(midQ->resp_buf);
520 520
521 /* convert ByteCount if necessary */ 521 /* convert ByteCount if necessary */
522 if (receive_len >= 522 if (receive_len >=
523 sizeof (struct smb_hdr) - 523 sizeof (struct smb_hdr) -
524 4 /* do not count RFC1001 header */ + 524 4 /* do not count RFC1001 header */ +
525 (2 * in_buf->WordCount) + 2 /* bcc */ ) 525 (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
526 BCC(in_buf) = le16_to_cpu(BCC_LE(in_buf)); 526 BCC(midQ->resp_buf) =
527 le16_to_cpu(BCC_LE(midQ->resp_buf));
528 midQ->resp_buf = NULL; /* mark it so will not be freed
529 by DeleteMidQEntry */
527 } else { 530 } else {
528 rc = -EIO; 531 rc = -EIO;
529 cFYI(1,("Bad MID state?")); 532 cFYI(1,("Bad MID state?"));
@@ -549,7 +552,6 @@ out_unlock2:
549 552
550 return rc; 553 return rc;
551} 554}
552#endif /* CIFS_EXPERIMENTAL */
553 555
554int 556int
555SendReceive(const unsigned int xid, struct cifsSesInfo *ses, 557SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
@@ -790,7 +792,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
790 BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); 792 BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
791 } else { 793 } else {
792 rc = -EIO; 794 rc = -EIO;
793 cERROR(1,("Bad MID state? ")); 795 cERROR(1,("Bad MID state?"));
794 } 796 }
795 } 797 }
796cifs_no_response_exit: 798cifs_no_response_exit:
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index f375f87c7dbd..777e3363c2a4 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -254,7 +254,8 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name,
254 rc = CIFSSMBQueryEA(xid,pTcon,full_path,ea_name,ea_value, 254 rc = CIFSSMBQueryEA(xid,pTcon,full_path,ea_name,ea_value,
255 buf_size, cifs_sb->local_nls, 255 buf_size, cifs_sb->local_nls,
256 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 256 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
257 } else if(strncmp(ea_name,POSIX_ACL_XATTR_ACCESS,strlen(POSIX_ACL_XATTR_ACCESS)) == 0) { 257 } else if(strncmp(ea_name,POSIX_ACL_XATTR_ACCESS,
258 strlen(POSIX_ACL_XATTR_ACCESS)) == 0) {
258#ifdef CONFIG_CIFS_POSIX 259#ifdef CONFIG_CIFS_POSIX
259 if(sb->s_flags & MS_POSIXACL) 260 if(sb->s_flags & MS_POSIXACL)
260 rc = CIFSSMBGetPosixACL(xid, pTcon, full_path, 261 rc = CIFSSMBGetPosixACL(xid, pTcon, full_path,
@@ -262,10 +263,27 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name,
262 cifs_sb->local_nls, 263 cifs_sb->local_nls,
263 cifs_sb->mnt_cifs_flags & 264 cifs_sb->mnt_cifs_flags &
264 CIFS_MOUNT_MAP_SPECIAL_CHR); 265 CIFS_MOUNT_MAP_SPECIAL_CHR);
266/* else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
267 __u16 fid;
268 int oplock = FALSE;
269 rc = CIFSSMBOpen(xid, pTcon, full_path,
270 FILE_OPEN, GENERIC_READ, 0, &fid,
271 &oplock, NULL, cifs_sb->local_nls,
272 cifs_sb->mnt_cifs_flags &
273 CIFS_MOUNT_MAP_SPECIAL_CHR);
274 if(rc == 0) {
275 rc = CIFSSMBGetCIFSACL(xid, pTcon, fid,
276 ea_value, buf_size,
277 ACL_TYPE_ACCESS);
278 CIFSSMBClose(xid, pTcon, fid)
279 }
280 } */ /* BB enable after fixing up return data */
281
265#else 282#else
266 cFYI(1,("query POSIX ACL not supported yet")); 283 cFYI(1,("query POSIX ACL not supported yet"));
267#endif /* CONFIG_CIFS_POSIX */ 284#endif /* CONFIG_CIFS_POSIX */
268 } else if(strncmp(ea_name,POSIX_ACL_XATTR_DEFAULT,strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) { 285 } else if(strncmp(ea_name,POSIX_ACL_XATTR_DEFAULT,
286 strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
269#ifdef CONFIG_CIFS_POSIX 287#ifdef CONFIG_CIFS_POSIX
270 if(sb->s_flags & MS_POSIXACL) 288 if(sb->s_flags & MS_POSIXACL)
271 rc = CIFSSMBGetPosixACL(xid, pTcon, full_path, 289 rc = CIFSSMBGetPosixACL(xid, pTcon, full_path,
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 80072fd9b7fa..c607d923350a 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -93,7 +93,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
93 spin_lock(&dcache_lock); 93 spin_lock(&dcache_lock);
94 list_for_each(child, &parent->d_subdirs) 94 list_for_each(child, &parent->d_subdirs)
95 { 95 {
96 de = list_entry(child, struct dentry, d_child); 96 de = list_entry(child, struct dentry, d_u.d_child);
97 /* don't know what to do with negative dentries */ 97 /* don't know what to do with negative dentries */
98 if ( ! de->d_inode ) 98 if ( ! de->d_inode )
99 continue; 99 continue;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 2391766e9c7c..8f1a517f8b4e 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -453,7 +453,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir)
453 coda_vfs_stat.readdir++; 453 coda_vfs_stat.readdir++;
454 454
455 host_inode = host_file->f_dentry->d_inode; 455 host_inode = host_file->f_dentry->d_inode;
456 down(&host_inode->i_sem); 456 mutex_lock(&host_inode->i_mutex);
457 host_file->f_pos = coda_file->f_pos; 457 host_file->f_pos = coda_file->f_pos;
458 458
459 if (!host_file->f_op->readdir) { 459 if (!host_file->f_op->readdir) {
@@ -475,7 +475,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir)
475 } 475 }
476out: 476out:
477 coda_file->f_pos = host_file->f_pos; 477 coda_file->f_pos = host_file->f_pos;
478 up(&host_inode->i_sem); 478 mutex_unlock(&host_inode->i_mutex);
479 479
480 return ret; 480 return ret;
481} 481}
diff --git a/fs/coda/file.c b/fs/coda/file.c
index e6bc022568f3..30b4630bd735 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -77,14 +77,14 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
77 return -EINVAL; 77 return -EINVAL;
78 78
79 host_inode = host_file->f_dentry->d_inode; 79 host_inode = host_file->f_dentry->d_inode;
80 down(&coda_inode->i_sem); 80 mutex_lock(&coda_inode->i_mutex);
81 81
82 ret = host_file->f_op->write(host_file, buf, count, ppos); 82 ret = host_file->f_op->write(host_file, buf, count, ppos);
83 83
84 coda_inode->i_size = host_inode->i_size; 84 coda_inode->i_size = host_inode->i_size;
85 coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9; 85 coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
86 coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC; 86 coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC;
87 up(&coda_inode->i_sem); 87 mutex_unlock(&coda_inode->i_mutex);
88 88
89 return ret; 89 return ret;
90} 90}
@@ -272,9 +272,9 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
272 if (host_file->f_op && host_file->f_op->fsync) { 272 if (host_file->f_op && host_file->f_op->fsync) {
273 host_dentry = host_file->f_dentry; 273 host_dentry = host_file->f_dentry;
274 host_inode = host_dentry->d_inode; 274 host_inode = host_dentry->d_inode;
275 down(&host_inode->i_sem); 275 mutex_lock(&host_inode->i_mutex);
276 err = host_file->f_op->fsync(host_file, host_dentry, datasync); 276 err = host_file->f_op->fsync(host_file, host_dentry, datasync);
277 up(&host_inode->i_sem); 277 mutex_unlock(&host_inode->i_mutex);
278 } 278 }
279 279
280 if ( !err && !datasync ) { 280 if ( !err && !datasync ) {
diff --git a/fs/compat.c b/fs/compat.c
index 55ac0324aaf1..ff0bafcff720 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -53,6 +53,8 @@
53#include <asm/mmu_context.h> 53#include <asm/mmu_context.h>
54#include <asm/ioctls.h> 54#include <asm/ioctls.h>
55 55
56extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
57
56/* 58/*
57 * Not all architectures have sys_utime, so implement this in terms 59 * Not all architectures have sys_utime, so implement this in terms
58 * of sys_utimes. 60 * of sys_utimes.
@@ -68,10 +70,10 @@ asmlinkage long compat_sys_utime(char __user *filename, struct compat_utimbuf __
68 tv[0].tv_usec = 0; 70 tv[0].tv_usec = 0;
69 tv[1].tv_usec = 0; 71 tv[1].tv_usec = 0;
70 } 72 }
71 return do_utimes(filename, t ? tv : NULL); 73 return do_utimes(AT_FDCWD, filename, t ? tv : NULL);
72} 74}
73 75
74asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval __user *t) 76asmlinkage long compat_sys_futimesat(int dfd, char __user *filename, struct compat_timeval __user *t)
75{ 77{
76 struct timeval tv[2]; 78 struct timeval tv[2];
77 79
@@ -82,14 +84,19 @@ asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval _
82 get_user(tv[1].tv_usec, &t[1].tv_usec)) 84 get_user(tv[1].tv_usec, &t[1].tv_usec))
83 return -EFAULT; 85 return -EFAULT;
84 } 86 }
85 return do_utimes(filename, t ? tv : NULL); 87 return do_utimes(dfd, filename, t ? tv : NULL);
88}
89
90asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval __user *t)
91{
92 return compat_sys_futimesat(AT_FDCWD, filename, t);
86} 93}
87 94
88asmlinkage long compat_sys_newstat(char __user * filename, 95asmlinkage long compat_sys_newstat(char __user * filename,
89 struct compat_stat __user *statbuf) 96 struct compat_stat __user *statbuf)
90{ 97{
91 struct kstat stat; 98 struct kstat stat;
92 int error = vfs_stat(filename, &stat); 99 int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
93 100
94 if (!error) 101 if (!error)
95 error = cp_compat_stat(&stat, statbuf); 102 error = cp_compat_stat(&stat, statbuf);
@@ -100,10 +107,31 @@ asmlinkage long compat_sys_newlstat(char __user * filename,
100 struct compat_stat __user *statbuf) 107 struct compat_stat __user *statbuf)
101{ 108{
102 struct kstat stat; 109 struct kstat stat;
103 int error = vfs_lstat(filename, &stat); 110 int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
111
112 if (!error)
113 error = cp_compat_stat(&stat, statbuf);
114 return error;
115}
116
117asmlinkage long compat_sys_newfstatat(int dfd, char __user *filename,
118 struct compat_stat __user *statbuf, int flag)
119{
120 struct kstat stat;
121 int error = -EINVAL;
122
123 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
124 goto out;
125
126 if (flag & AT_SYMLINK_NOFOLLOW)
127 error = vfs_lstat_fd(dfd, filename, &stat);
128 else
129 error = vfs_stat_fd(dfd, filename, &stat);
104 130
105 if (!error) 131 if (!error)
106 error = cp_compat_stat(&stat, statbuf); 132 error = cp_compat_stat(&stat, statbuf);
133
134out:
107 return error; 135 return error;
108} 136}
109 137
@@ -494,9 +522,21 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
494 ret = sys_fcntl(fd, cmd, (unsigned long)&f); 522 ret = sys_fcntl(fd, cmd, (unsigned long)&f);
495 set_fs(old_fs); 523 set_fs(old_fs);
496 if (cmd == F_GETLK && ret == 0) { 524 if (cmd == F_GETLK && ret == 0) {
497 if ((f.l_start >= COMPAT_OFF_T_MAX) || 525 /* GETLK was successfule and we need to return the data...
498 ((f.l_start + f.l_len) > COMPAT_OFF_T_MAX)) 526 * but it needs to fit in the compat structure.
527 * l_start shouldn't be too big, unless the original
528 * start + end is greater than COMPAT_OFF_T_MAX, in which
529 * case the app was asking for trouble, so we return
530 * -EOVERFLOW in that case.
531 * l_len could be too big, in which case we just truncate it,
532 * and only allow the app to see that part of the conflicting
533 * lock that might make sense to it anyway
534 */
535
536 if (f.l_start > COMPAT_OFF_T_MAX)
499 ret = -EOVERFLOW; 537 ret = -EOVERFLOW;
538 if (f.l_len > COMPAT_OFF_T_MAX)
539 f.l_len = COMPAT_OFF_T_MAX;
500 if (ret == 0) 540 if (ret == 0)
501 ret = put_compat_flock(&f, compat_ptr(arg)); 541 ret = put_compat_flock(&f, compat_ptr(arg));
502 } 542 }
@@ -515,9 +555,11 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
515 (unsigned long)&f); 555 (unsigned long)&f);
516 set_fs(old_fs); 556 set_fs(old_fs);
517 if (cmd == F_GETLK64 && ret == 0) { 557 if (cmd == F_GETLK64 && ret == 0) {
518 if ((f.l_start >= COMPAT_LOFF_T_MAX) || 558 /* need to return lock information - see above for commentary */
519 ((f.l_start + f.l_len) > COMPAT_LOFF_T_MAX)) 559 if (f.l_start > COMPAT_LOFF_T_MAX)
520 ret = -EOVERFLOW; 560 ret = -EOVERFLOW;
561 if (f.l_len > COMPAT_LOFF_T_MAX)
562 f.l_len = COMPAT_LOFF_T_MAX;
521 if (ret == 0) 563 if (ret == 0)
522 ret = put_compat_flock64(&f, compat_ptr(arg)); 564 ret = put_compat_flock64(&f, compat_ptr(arg));
523 } 565 }
@@ -1276,7 +1318,17 @@ out:
1276asmlinkage long 1318asmlinkage long
1277compat_sys_open(const char __user *filename, int flags, int mode) 1319compat_sys_open(const char __user *filename, int flags, int mode)
1278{ 1320{
1279 return do_sys_open(filename, flags, mode); 1321 return do_sys_open(AT_FDCWD, filename, flags, mode);
1322}
1323
1324/*
1325 * Exactly like fs/open.c:sys_openat(), except that it doesn't set the
1326 * O_LARGEFILE flag.
1327 */
1328asmlinkage long
1329compat_sys_openat(int dfd, const char __user *filename, int flags, int mode)
1330{
1331 return do_sys_open(dfd, filename, flags, mode);
1280} 1332}
1281 1333
1282/* 1334/*
@@ -1523,7 +1575,7 @@ out_ret:
1523 * Ooo, nasty. We need here to frob 32-bit unsigned longs to 1575 * Ooo, nasty. We need here to frob 32-bit unsigned longs to
1524 * 64-bit unsigned longs. 1576 * 64-bit unsigned longs.
1525 */ 1577 */
1526static inline 1578static
1527int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, 1579int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1528 unsigned long *fdset) 1580 unsigned long *fdset)
1529{ 1581{
@@ -1556,7 +1608,7 @@ int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1556 return 0; 1608 return 0;
1557} 1609}
1558 1610
1559static inline 1611static
1560void compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, 1612void compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1561 unsigned long *fdset) 1613 unsigned long *fdset)
1562{ 1614{
@@ -1607,36 +1659,14 @@ static void select_bits_free(void *bits, int size)
1607#define MAX_SELECT_SECONDS \ 1659#define MAX_SELECT_SECONDS \
1608 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 1660 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
1609 1661
1610asmlinkage long 1662int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1611compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, 1663 compat_ulong_t __user *outp, compat_ulong_t __user *exp, s64 *timeout)
1612 compat_ulong_t __user *exp, struct compat_timeval __user *tvp)
1613{ 1664{
1614 fd_set_bits fds; 1665 fd_set_bits fds;
1615 char *bits; 1666 char *bits;
1616 long timeout;
1617 int size, max_fdset, ret = -EINVAL; 1667 int size, max_fdset, ret = -EINVAL;
1618 struct fdtable *fdt; 1668 struct fdtable *fdt;
1619 1669
1620 timeout = MAX_SCHEDULE_TIMEOUT;
1621 if (tvp) {
1622 time_t sec, usec;
1623
1624 if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp))
1625 || __get_user(sec, &tvp->tv_sec)
1626 || __get_user(usec, &tvp->tv_usec)) {
1627 ret = -EFAULT;
1628 goto out_nofds;
1629 }
1630
1631 if (sec < 0 || usec < 0)
1632 goto out_nofds;
1633
1634 if ((unsigned long) sec < MAX_SELECT_SECONDS) {
1635 timeout = ROUND_UP(usec, 1000000/HZ);
1636 timeout += sec * (unsigned long) HZ;
1637 }
1638 }
1639
1640 if (n < 0) 1670 if (n < 0)
1641 goto out_nofds; 1671 goto out_nofds;
1642 1672
@@ -1673,19 +1703,7 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp
1673 zero_fd_set(n, fds.res_out); 1703 zero_fd_set(n, fds.res_out);
1674 zero_fd_set(n, fds.res_ex); 1704 zero_fd_set(n, fds.res_ex);
1675 1705
1676 ret = do_select(n, &fds, &timeout); 1706 ret = do_select(n, &fds, timeout);
1677
1678 if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
1679 time_t sec = 0, usec = 0;
1680 if (timeout) {
1681 sec = timeout / HZ;
1682 usec = timeout % HZ;
1683 usec *= (1000000/HZ);
1684 }
1685 if (put_user(sec, &tvp->tv_sec) ||
1686 put_user(usec, &tvp->tv_usec))
1687 ret = -EFAULT;
1688 }
1689 1707
1690 if (ret < 0) 1708 if (ret < 0)
1691 goto out; 1709 goto out;
@@ -1706,6 +1724,224 @@ out_nofds:
1706 return ret; 1724 return ret;
1707} 1725}
1708 1726
1727asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1728 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1729 struct compat_timeval __user *tvp)
1730{
1731 s64 timeout = -1;
1732 struct compat_timeval tv;
1733 int ret;
1734
1735 if (tvp) {
1736 if (copy_from_user(&tv, tvp, sizeof(tv)))
1737 return -EFAULT;
1738
1739 if (tv.tv_sec < 0 || tv.tv_usec < 0)
1740 return -EINVAL;
1741
1742 /* Cast to u64 to make GCC stop complaining */
1743 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
1744 timeout = -1; /* infinite */
1745 else {
1746 timeout = ROUND_UP(tv.tv_usec, 1000000/HZ);
1747 timeout += tv.tv_sec * HZ;
1748 }
1749 }
1750
1751 ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
1752
1753 if (tvp) {
1754 if (current->personality & STICKY_TIMEOUTS)
1755 goto sticky;
1756 tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
1757 tv.tv_sec = timeout;
1758 if (copy_to_user(tvp, &tv, sizeof(tv))) {
1759sticky:
1760 /*
1761 * If an application puts its timeval in read-only
1762 * memory, we don't want the Linux-specific update to
1763 * the timeval to cause a fault after the select has
1764 * completed successfully. However, because we're not
1765 * updating the timeval, we can't restart the system
1766 * call.
1767 */
1768 if (ret == -ERESTARTNOHAND)
1769 ret = -EINTR;
1770 }
1771 }
1772
1773 return ret;
1774}
1775
1776#ifdef TIF_RESTORE_SIGMASK
1777asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1778 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1779 struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
1780 compat_size_t sigsetsize)
1781{
1782 compat_sigset_t ss32;
1783 sigset_t ksigmask, sigsaved;
1784 long timeout = MAX_SCHEDULE_TIMEOUT;
1785 struct compat_timespec ts;
1786 int ret;
1787
1788 if (tsp) {
1789 if (copy_from_user(&ts, tsp, sizeof(ts)))
1790 return -EFAULT;
1791
1792 if (ts.tv_sec < 0 || ts.tv_nsec < 0)
1793 return -EINVAL;
1794 }
1795
1796 if (sigmask) {
1797 if (sigsetsize != sizeof(compat_sigset_t))
1798 return -EINVAL;
1799 if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
1800 return -EFAULT;
1801 sigset_from_compat(&ksigmask, &ss32);
1802
1803 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
1804 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1805 }
1806
1807 do {
1808 if (tsp) {
1809 if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) {
1810 timeout = ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1811 timeout += ts.tv_sec * (unsigned long)HZ;
1812 ts.tv_sec = 0;
1813 ts.tv_nsec = 0;
1814 } else {
1815 ts.tv_sec -= MAX_SELECT_SECONDS;
1816 timeout = MAX_SELECT_SECONDS * HZ;
1817 }
1818 }
1819
1820 ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
1821
1822 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
1823
1824 if (tsp && !(current->personality & STICKY_TIMEOUTS)) {
1825 ts.tv_sec += timeout / HZ;
1826 ts.tv_nsec += (timeout % HZ) * (1000000000/HZ);
1827 if (ts.tv_nsec >= 1000000000) {
1828 ts.tv_sec++;
1829 ts.tv_nsec -= 1000000000;
1830 }
1831 (void)copy_to_user(tsp, &ts, sizeof(ts));
1832 }
1833
1834 if (ret == -ERESTARTNOHAND) {
1835 /*
1836 * Don't restore the signal mask yet. Let do_signal() deliver
1837 * the signal on the way back to userspace, before the signal
1838 * mask is restored.
1839 */
1840 if (sigmask) {
1841 memcpy(&current->saved_sigmask, &sigsaved,
1842 sizeof(sigsaved));
1843 set_thread_flag(TIF_RESTORE_SIGMASK);
1844 }
1845 } else if (sigmask)
1846 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1847
1848 return ret;
1849}
1850
1851asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp,
1852 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1853 struct compat_timespec __user *tsp, void __user *sig)
1854{
1855 compat_size_t sigsetsize = 0;
1856 compat_uptr_t up = 0;
1857
1858 if (sig) {
1859 if (!access_ok(VERIFY_READ, sig,
1860 sizeof(compat_uptr_t)+sizeof(compat_size_t)) ||
1861 __get_user(up, (compat_uptr_t __user *)sig) ||
1862 __get_user(sigsetsize,
1863 (compat_size_t __user *)(sig+sizeof(up))))
1864 return -EFAULT;
1865 }
1866 return compat_sys_pselect7(n, inp, outp, exp, tsp, compat_ptr(up),
1867 sigsetsize);
1868}
1869
1870asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1871 unsigned int nfds, struct compat_timespec __user *tsp,
1872 const compat_sigset_t __user *sigmask, compat_size_t sigsetsize)
1873{
1874 compat_sigset_t ss32;
1875 sigset_t ksigmask, sigsaved;
1876 struct compat_timespec ts;
1877 s64 timeout = -1;
1878 int ret;
1879
1880 if (tsp) {
1881 if (copy_from_user(&ts, tsp, sizeof(ts)))
1882 return -EFAULT;
1883
1884 /* We assume that ts.tv_sec is always lower than
1885 the number of seconds that can be expressed in
1886 an s64. Otherwise the compiler bitches at us */
1887 timeout = ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1888 timeout += ts.tv_sec * HZ;
1889 }
1890
1891 if (sigmask) {
1892 if (sigsetsize |= sizeof(compat_sigset_t))
1893 return -EINVAL;
1894 if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
1895 return -EFAULT;
1896 sigset_from_compat(&ksigmask, &ss32);
1897
1898 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
1899 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1900 }
1901
1902 ret = do_sys_poll(ufds, nfds, &timeout);
1903
1904 /* We can restart this syscall, usually */
1905 if (ret == -EINTR) {
1906 /*
1907 * Don't restore the signal mask yet. Let do_signal() deliver
1908 * the signal on the way back to userspace, before the signal
1909 * mask is restored.
1910 */
1911 if (sigmask) {
1912 memcpy(&current->saved_sigmask, &sigsaved,
1913 sizeof(sigsaved));
1914 set_thread_flag(TIF_RESTORE_SIGMASK);
1915 }
1916 ret = -ERESTARTNOHAND;
1917 } else if (sigmask)
1918 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1919
1920 if (tsp && timeout >= 0) {
1921 if (current->personality & STICKY_TIMEOUTS)
1922 goto sticky;
1923 /* Yes, we know it's actually an s64, but it's also positive. */
1924 ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000;
1925 ts.tv_sec = timeout;
1926 if (copy_to_user(tsp, &ts, sizeof(ts))) {
1927sticky:
1928 /*
1929 * If an application puts its timeval in read-only
1930 * memory, we don't want the Linux-specific update to
1931 * the timeval to cause a fault after the select has
1932 * completed successfully. However, because we're not
1933 * updating the timeval, we can't restart the system
1934 * call.
1935 */
1936 if (ret == -ERESTARTNOHAND && timeout >= 0)
1937 ret = -EINTR;
1938 }
1939 }
1940
1941 return ret;
1942}
1943#endif /* TIF_RESTORE_SIGMASK */
1944
1709#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) 1945#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)
1710/* Stuff for NFS server syscalls... */ 1946/* Stuff for NFS server syscalls... */
1711struct compat_nfsctl_svc { 1947struct compat_nfsctl_svc {
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 43a2508ac696..5dd0207ffd46 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -10,11 +10,11 @@
10 * ioctls. 10 * ioctls.
11 */ 11 */
12 12
13#ifdef INCLUDES
14#include <linux/config.h> 13#include <linux/config.h>
15#include <linux/types.h> 14#include <linux/types.h>
16#include <linux/compat.h> 15#include <linux/compat.h>
17#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/capability.h>
18#include <linux/compiler.h> 18#include <linux/compiler.h>
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/smp.h> 20#include <linux/smp.h>
@@ -81,13 +81,9 @@
81#include <linux/capi.h> 81#include <linux/capi.h>
82 82
83#include <scsi/scsi.h> 83#include <scsi/scsi.h>
84/* Ugly hack. */
85#undef __KERNEL__
86#include <scsi/scsi_ioctl.h> 84#include <scsi/scsi_ioctl.h>
87#define __KERNEL__
88#include <scsi/sg.h> 85#include <scsi/sg.h>
89 86
90#include <asm/types.h>
91#include <asm/uaccess.h> 87#include <asm/uaccess.h>
92#include <linux/ethtool.h> 88#include <linux/ethtool.h>
93#include <linux/mii.h> 89#include <linux/mii.h>
@@ -95,7 +91,6 @@
95#include <linux/watchdog.h> 91#include <linux/watchdog.h>
96#include <linux/dm-ioctl.h> 92#include <linux/dm-ioctl.h>
97 93
98#include <asm/module.h>
99#include <linux/soundcard.h> 94#include <linux/soundcard.h>
100#include <linux/lp.h> 95#include <linux/lp.h>
101#include <linux/ppdev.h> 96#include <linux/ppdev.h>
@@ -127,11 +122,7 @@
127#include <linux/dvb/dmx.h> 122#include <linux/dvb/dmx.h>
128#include <linux/dvb/frontend.h> 123#include <linux/dvb/frontend.h>
129#include <linux/dvb/video.h> 124#include <linux/dvb/video.h>
130 125#include <linux/lp.h>
131#undef INCLUDES
132#endif
133
134#ifdef CODE
135 126
136/* Aiee. Someone does not find a difference between int and long */ 127/* Aiee. Someone does not find a difference between int and long */
137#define EXT2_IOC32_GETFLAGS _IOR('f', 1, int) 128#define EXT2_IOC32_GETFLAGS _IOR('f', 1, int)
@@ -148,6 +139,12 @@
148#define EXT2_IOC32_GETVERSION _IOR('v', 1, int) 139#define EXT2_IOC32_GETVERSION _IOR('v', 1, int)
149#define EXT2_IOC32_SETVERSION _IOW('v', 2, int) 140#define EXT2_IOC32_SETVERSION _IOW('v', 2, int)
150 141
142static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd,
143 unsigned long arg, struct file *f)
144{
145 return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
146}
147
151static int w_long(unsigned int fd, unsigned int cmd, unsigned long arg) 148static int w_long(unsigned int fd, unsigned int cmd, unsigned long arg)
152{ 149{
153 mm_segment_t old_fs = get_fs(); 150 mm_segment_t old_fs = get_fs();
@@ -207,244 +204,6 @@ static int do_ext3_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
207 return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg)); 204 return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
208} 205}
209 206
210struct video_tuner32 {
211 compat_int_t tuner;
212 char name[32];
213 compat_ulong_t rangelow, rangehigh;
214 u32 flags; /* It is really u32 in videodev.h */
215 u16 mode, signal;
216};
217
218static int get_video_tuner32(struct video_tuner *kp, struct video_tuner32 __user *up)
219{
220 int i;
221
222 if(get_user(kp->tuner, &up->tuner))
223 return -EFAULT;
224 for(i = 0; i < 32; i++)
225 __get_user(kp->name[i], &up->name[i]);
226 __get_user(kp->rangelow, &up->rangelow);
227 __get_user(kp->rangehigh, &up->rangehigh);
228 __get_user(kp->flags, &up->flags);
229 __get_user(kp->mode, &up->mode);
230 __get_user(kp->signal, &up->signal);
231 return 0;
232}
233
234static int put_video_tuner32(struct video_tuner *kp, struct video_tuner32 __user *up)
235{
236 int i;
237
238 if(put_user(kp->tuner, &up->tuner))
239 return -EFAULT;
240 for(i = 0; i < 32; i++)
241 __put_user(kp->name[i], &up->name[i]);
242 __put_user(kp->rangelow, &up->rangelow);
243 __put_user(kp->rangehigh, &up->rangehigh);
244 __put_user(kp->flags, &up->flags);
245 __put_user(kp->mode, &up->mode);
246 __put_user(kp->signal, &up->signal);
247 return 0;
248}
249
250struct video_buffer32 {
251 compat_caddr_t base;
252 compat_int_t height, width, depth, bytesperline;
253};
254
255static int get_video_buffer32(struct video_buffer *kp, struct video_buffer32 __user *up)
256{
257 u32 tmp;
258
259 if (get_user(tmp, &up->base))
260 return -EFAULT;
261
262 /* This is actually a physical address stored
263 * as a void pointer.
264 */
265 kp->base = (void *)(unsigned long) tmp;
266
267 __get_user(kp->height, &up->height);
268 __get_user(kp->width, &up->width);
269 __get_user(kp->depth, &up->depth);
270 __get_user(kp->bytesperline, &up->bytesperline);
271 return 0;
272}
273
274static int put_video_buffer32(struct video_buffer *kp, struct video_buffer32 __user *up)
275{
276 u32 tmp = (u32)((unsigned long)kp->base);
277
278 if(put_user(tmp, &up->base))
279 return -EFAULT;
280 __put_user(kp->height, &up->height);
281 __put_user(kp->width, &up->width);
282 __put_user(kp->depth, &up->depth);
283 __put_user(kp->bytesperline, &up->bytesperline);
284 return 0;
285}
286
287struct video_clip32 {
288 s32 x, y, width, height; /* Its really s32 in videodev.h */
289 compat_caddr_t next;
290};
291
292struct video_window32 {
293 u32 x, y, width, height, chromakey, flags;
294 compat_caddr_t clips;
295 compat_int_t clipcount;
296};
297
298/* You get back everything except the clips... */
299static int put_video_window32(struct video_window *kp, struct video_window32 __user *up)
300{
301 if(put_user(kp->x, &up->x))
302 return -EFAULT;
303 __put_user(kp->y, &up->y);
304 __put_user(kp->width, &up->width);
305 __put_user(kp->height, &up->height);
306 __put_user(kp->chromakey, &up->chromakey);
307 __put_user(kp->flags, &up->flags);
308 __put_user(kp->clipcount, &up->clipcount);
309 return 0;
310}
311
312#define VIDIOCGTUNER32 _IOWR('v',4, struct video_tuner32)
313#define VIDIOCSTUNER32 _IOW('v',5, struct video_tuner32)
314#define VIDIOCGWIN32 _IOR('v',9, struct video_window32)
315#define VIDIOCSWIN32 _IOW('v',10, struct video_window32)
316#define VIDIOCGFBUF32 _IOR('v',11, struct video_buffer32)
317#define VIDIOCSFBUF32 _IOW('v',12, struct video_buffer32)
318#define VIDIOCGFREQ32 _IOR('v',14, u32)
319#define VIDIOCSFREQ32 _IOW('v',15, u32)
320
321enum {
322 MaxClips = (~0U-sizeof(struct video_window))/sizeof(struct video_clip)
323};
324
325static int do_set_window(unsigned int fd, unsigned int cmd, unsigned long arg)
326{
327 struct video_window32 __user *up = compat_ptr(arg);
328 struct video_window __user *vw;
329 struct video_clip __user *p;
330 int nclips;
331 u32 n;
332
333 if (get_user(nclips, &up->clipcount))
334 return -EFAULT;
335
336 /* Peculiar interface... */
337 if (nclips < 0)
338 nclips = VIDEO_CLIPMAP_SIZE;
339
340 if (nclips > MaxClips)
341 return -ENOMEM;
342
343 vw = compat_alloc_user_space(sizeof(struct video_window) +
344 nclips * sizeof(struct video_clip));
345
346 p = nclips ? (struct video_clip __user *)(vw + 1) : NULL;
347
348 if (get_user(n, &up->x) || put_user(n, &vw->x) ||
349 get_user(n, &up->y) || put_user(n, &vw->y) ||
350 get_user(n, &up->width) || put_user(n, &vw->width) ||
351 get_user(n, &up->height) || put_user(n, &vw->height) ||
352 get_user(n, &up->chromakey) || put_user(n, &vw->chromakey) ||
353 get_user(n, &up->flags) || put_user(n, &vw->flags) ||
354 get_user(n, &up->clipcount) || put_user(n, &vw->clipcount) ||
355 get_user(n, &up->clips) || put_user(p, &vw->clips))
356 return -EFAULT;
357
358 if (nclips) {
359 struct video_clip32 __user *u = compat_ptr(n);
360 int i;
361 if (!u)
362 return -EINVAL;
363 for (i = 0; i < nclips; i++, u++, p++) {
364 s32 v;
365 if (get_user(v, &u->x) ||
366 put_user(v, &p->x) ||
367 get_user(v, &u->y) ||
368 put_user(v, &p->y) ||
369 get_user(v, &u->width) ||
370 put_user(v, &p->width) ||
371 get_user(v, &u->height) ||
372 put_user(v, &p->height) ||
373 put_user(NULL, &p->next))
374 return -EFAULT;
375 }
376 }
377
378 return sys_ioctl(fd, VIDIOCSWIN, (unsigned long)p);
379}
380
381static int do_video_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
382{
383 union {
384 struct video_tuner vt;
385 struct video_buffer vb;
386 struct video_window vw;
387 unsigned long vx;
388 } karg;
389 mm_segment_t old_fs = get_fs();
390 void __user *up = compat_ptr(arg);
391 int err = 0;
392
393 /* First, convert the command. */
394 switch(cmd) {
395 case VIDIOCGTUNER32: cmd = VIDIOCGTUNER; break;
396 case VIDIOCSTUNER32: cmd = VIDIOCSTUNER; break;
397 case VIDIOCGWIN32: cmd = VIDIOCGWIN; break;
398 case VIDIOCGFBUF32: cmd = VIDIOCGFBUF; break;
399 case VIDIOCSFBUF32: cmd = VIDIOCSFBUF; break;
400 case VIDIOCGFREQ32: cmd = VIDIOCGFREQ; break;
401 case VIDIOCSFREQ32: cmd = VIDIOCSFREQ; break;
402 };
403
404 switch(cmd) {
405 case VIDIOCSTUNER:
406 case VIDIOCGTUNER:
407 err = get_video_tuner32(&karg.vt, up);
408 break;
409
410 case VIDIOCSFBUF:
411 err = get_video_buffer32(&karg.vb, up);
412 break;
413
414 case VIDIOCSFREQ:
415 err = get_user(karg.vx, (u32 __user *)up);
416 break;
417 };
418 if(err)
419 goto out;
420
421 set_fs(KERNEL_DS);
422 err = sys_ioctl(fd, cmd, (unsigned long)&karg);
423 set_fs(old_fs);
424
425 if(err == 0) {
426 switch(cmd) {
427 case VIDIOCGTUNER:
428 err = put_video_tuner32(&karg.vt, up);
429 break;
430
431 case VIDIOCGWIN:
432 err = put_video_window32(&karg.vw, up);
433 break;
434
435 case VIDIOCGFBUF:
436 err = put_video_buffer32(&karg.vb, up);
437 break;
438
439 case VIDIOCGFREQ:
440 err = put_user(((u32)karg.vx), (u32 __user *)up);
441 break;
442 };
443 }
444out:
445 return err;
446}
447
448struct compat_dmx_event { 207struct compat_dmx_event {
449 dmx_event_t event; 208 dmx_event_t event;
450 compat_time_t timeStamp; 209 compat_time_t timeStamp;
@@ -1158,6 +917,40 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1158 return err; 917 return err;
1159} 918}
1160 919
920struct compat_sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
921 char req_state;
922 char orphan;
923 char sg_io_owned;
924 char problem;
925 int pack_id;
926 compat_uptr_t usr_ptr;
927 unsigned int duration;
928 int unused;
929};
930
931static int sg_grt_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
932{
933 int err, i;
934 sg_req_info_t *r;
935 struct compat_sg_req_info *o = (struct compat_sg_req_info *)arg;
936 r = compat_alloc_user_space(sizeof(sg_req_info_t)*SG_MAX_QUEUE);
937 err = sys_ioctl(fd,cmd,(unsigned long)r);
938 if (err < 0)
939 return err;
940 for (i = 0; i < SG_MAX_QUEUE; i++) {
941 void __user *ptr;
942 int d;
943
944 if (copy_in_user(o + i, r + i, offsetof(sg_req_info_t, usr_ptr)) ||
945 get_user(ptr, &r[i].usr_ptr) ||
946 get_user(d, &r[i].duration) ||
947 put_user((u32)(unsigned long)(ptr), &o[i].usr_ptr) ||
948 put_user(d, &o[i].duration))
949 return -EFAULT;
950 }
951 return err;
952}
953
1161struct sock_fprog32 { 954struct sock_fprog32 {
1162 unsigned short len; 955 unsigned short len;
1163 compat_caddr_t filter; 956 compat_caddr_t filter;
@@ -2713,6 +2506,49 @@ static int old_bridge_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg
2713 return -EINVAL; 2506 return -EINVAL;
2714} 2507}
2715 2508
2509#define RTC_IRQP_READ32 _IOR('p', 0x0b, compat_ulong_t)
2510#define RTC_IRQP_SET32 _IOW('p', 0x0c, compat_ulong_t)
2511#define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t)
2512#define RTC_EPOCH_SET32 _IOW('p', 0x0e, compat_ulong_t)
2513
2514static int rtc_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
2515{
2516 mm_segment_t oldfs = get_fs();
2517 compat_ulong_t val32;
2518 unsigned long kval;
2519 int ret;
2520
2521 switch (cmd) {
2522 case RTC_IRQP_READ32:
2523 case RTC_EPOCH_READ32:
2524 set_fs(KERNEL_DS);
2525 ret = sys_ioctl(fd, (cmd == RTC_IRQP_READ32) ?
2526 RTC_IRQP_READ : RTC_EPOCH_READ,
2527 (unsigned long)&kval);
2528 set_fs(oldfs);
2529 if (ret)
2530 return ret;
2531 val32 = kval;
2532 return put_user(val32, (unsigned int __user *)arg);
2533 case RTC_IRQP_SET32:
2534 case RTC_EPOCH_SET32:
2535 ret = get_user(val32, (unsigned int __user *)arg);
2536 if (ret)
2537 return ret;
2538 kval = val32;
2539
2540 set_fs(KERNEL_DS);
2541 ret = sys_ioctl(fd, (cmd == RTC_IRQP_SET32) ?
2542 RTC_IRQP_SET : RTC_EPOCH_SET,
2543 (unsigned long)&kval);
2544 set_fs(oldfs);
2545 return ret;
2546 default:
2547 /* unreached */
2548 return -ENOIOCTLCMD;
2549 }
2550}
2551
2716#if defined(CONFIG_NCP_FS) || defined(CONFIG_NCP_FS_MODULE) 2552#if defined(CONFIG_NCP_FS) || defined(CONFIG_NCP_FS_MODULE)
2717struct ncp_ioctl_request_32 { 2553struct ncp_ioctl_request_32 {
2718 u32 function; 2554 u32 function;
@@ -2900,10 +2736,34 @@ static int do_ncp_setprivatedata(unsigned int fd, unsigned int cmd, unsigned lon
2900} 2736}
2901#endif 2737#endif
2902 2738
2903#undef CODE 2739static int
2904#endif 2740lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
2741{
2742 struct compat_timeval *tc = (struct compat_timeval *)arg;
2743 struct timeval *tn = compat_alloc_user_space(sizeof(struct timeval));
2744 struct timeval ts;
2745 if (get_user(ts.tv_sec, &tc->tv_sec) ||
2746 get_user(ts.tv_usec, &tc->tv_usec) ||
2747 put_user(ts.tv_sec, &tn->tv_sec) ||
2748 put_user(ts.tv_usec, &tn->tv_usec))
2749 return -EFAULT;
2750 return sys_ioctl(fd, cmd, (unsigned long)tn);
2751}
2752
2753#define HANDLE_IOCTL(cmd,handler) \
2754 { (cmd), (ioctl_trans_handler_t)(handler) },
2755
2756/* pointer to compatible structure or no argument */
2757#define COMPATIBLE_IOCTL(cmd) \
2758 { (cmd), do_ioctl32_pointer },
2759
2760/* argument is an unsigned long integer, not a pointer */
2761#define ULONG_IOCTL(cmd) \
2762 { (cmd), (ioctl_trans_handler_t)sys_ioctl },
2905 2763
2906#ifdef DECLARES 2764
2765struct ioctl_trans ioctl_start[] = {
2766#include <linux/compat_ioctl.h>
2907HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob) 2767HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob)
2908HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob) 2768HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob)
2909#ifdef CONFIG_NET 2769#ifdef CONFIG_NET
@@ -2983,6 +2843,7 @@ HANDLE_IOCTL(FDPOLLDRVSTAT32, fd_ioctl_trans)
2983HANDLE_IOCTL(FDGETFDCSTAT32, fd_ioctl_trans) 2843HANDLE_IOCTL(FDGETFDCSTAT32, fd_ioctl_trans)
2984HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans) 2844HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans)
2985HANDLE_IOCTL(SG_IO,sg_ioctl_trans) 2845HANDLE_IOCTL(SG_IO,sg_ioctl_trans)
2846HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans)
2986HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans) 2847HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans)
2987HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans) 2848HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans)
2988HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans) 2849HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans)
@@ -3015,14 +2876,6 @@ COMPATIBLE_IOCTL(EXT3_IOC_GROUP_ADD)
3015#ifdef CONFIG_JBD_DEBUG 2876#ifdef CONFIG_JBD_DEBUG
3016HANDLE_IOCTL(EXT3_IOC32_WAIT_FOR_READONLY, do_ext3_ioctl) 2877HANDLE_IOCTL(EXT3_IOC32_WAIT_FOR_READONLY, do_ext3_ioctl)
3017#endif 2878#endif
3018HANDLE_IOCTL(VIDIOCGTUNER32, do_video_ioctl)
3019HANDLE_IOCTL(VIDIOCSTUNER32, do_video_ioctl)
3020HANDLE_IOCTL(VIDIOCGWIN32, do_video_ioctl)
3021HANDLE_IOCTL(VIDIOCSWIN32, do_set_window)
3022HANDLE_IOCTL(VIDIOCGFBUF32, do_video_ioctl)
3023HANDLE_IOCTL(VIDIOCSFBUF32, do_video_ioctl)
3024HANDLE_IOCTL(VIDIOCGFREQ32, do_video_ioctl)
3025HANDLE_IOCTL(VIDIOCSFREQ32, do_video_ioctl)
3026/* One SMB ioctl needs translations. */ 2879/* One SMB ioctl needs translations. */
3027#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t) 2880#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
3028HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid) 2881HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid)
@@ -3104,6 +2957,10 @@ HANDLE_IOCTL(SIOCSIWENCODE, do_wireless_ioctl)
3104HANDLE_IOCTL(SIOCGIWENCODE, do_wireless_ioctl) 2957HANDLE_IOCTL(SIOCGIWENCODE, do_wireless_ioctl)
3105HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl) 2958HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl)
3106HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl) 2959HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl)
2960HANDLE_IOCTL(RTC_IRQP_READ32, rtc_ioctl)
2961HANDLE_IOCTL(RTC_IRQP_SET32, rtc_ioctl)
2962HANDLE_IOCTL(RTC_EPOCH_READ32, rtc_ioctl)
2963HANDLE_IOCTL(RTC_EPOCH_SET32, rtc_ioctl)
3107 2964
3108#if defined(CONFIG_NCP_FS) || defined(CONFIG_NCP_FS_MODULE) 2965#if defined(CONFIG_NCP_FS) || defined(CONFIG_NCP_FS_MODULE)
3109HANDLE_IOCTL(NCP_IOC_NCPREQUEST_32, do_ncp_ncprequest) 2966HANDLE_IOCTL(NCP_IOC_NCPREQUEST_32, do_ncp_ncprequest)
@@ -3121,5 +2978,19 @@ HANDLE_IOCTL(VIDEO_GET_EVENT, do_video_get_event)
3121HANDLE_IOCTL(VIDEO_STILLPICTURE, do_video_stillpicture) 2978HANDLE_IOCTL(VIDEO_STILLPICTURE, do_video_stillpicture)
3122HANDLE_IOCTL(VIDEO_SET_SPU_PALETTE, do_video_set_spu_palette) 2979HANDLE_IOCTL(VIDEO_SET_SPU_PALETTE, do_video_set_spu_palette)
3123 2980
3124#undef DECLARES 2981/* parport */
3125#endif 2982COMPATIBLE_IOCTL(LPTIME)
2983COMPATIBLE_IOCTL(LPCHAR)
2984COMPATIBLE_IOCTL(LPABORTOPEN)
2985COMPATIBLE_IOCTL(LPCAREFUL)
2986COMPATIBLE_IOCTL(LPWAIT)
2987COMPATIBLE_IOCTL(LPSETIRQ)
2988COMPATIBLE_IOCTL(LPGETSTATUS)
2989COMPATIBLE_IOCTL(LPGETSTATUS)
2990COMPATIBLE_IOCTL(LPRESET)
2991/*LPGETSTATS not implemented, but no kernels seem to compile it in anyways*/
2992COMPATIBLE_IOCTL(LPGETFLAGS)
2993HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans)
2994};
2995
2996int ioctl_table_size = ARRAY_SIZE(ioctl_start);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index e48b539243a1..b668ec61527e 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -288,10 +288,10 @@ static struct dentry * configfs_lookup(struct inode *dir,
288 288
289/* 289/*
290 * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are 290 * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are
291 * attributes and are removed by rmdir(). We recurse, taking i_sem 291 * attributes and are removed by rmdir(). We recurse, taking i_mutex
292 * on all children that are candidates for default detach. If the 292 * on all children that are candidates for default detach. If the
293 * result is clean, then configfs_detach_group() will handle dropping 293 * result is clean, then configfs_detach_group() will handle dropping
294 * i_sem. If there is an error, the caller will clean up the i_sem 294 * i_mutex. If there is an error, the caller will clean up the i_mutex
295 * holders via configfs_detach_rollback(). 295 * holders via configfs_detach_rollback().
296 */ 296 */
297static int configfs_detach_prep(struct dentry *dentry) 297static int configfs_detach_prep(struct dentry *dentry)
@@ -309,8 +309,8 @@ static int configfs_detach_prep(struct dentry *dentry)
309 if (sd->s_type & CONFIGFS_NOT_PINNED) 309 if (sd->s_type & CONFIGFS_NOT_PINNED)
310 continue; 310 continue;
311 if (sd->s_type & CONFIGFS_USET_DEFAULT) { 311 if (sd->s_type & CONFIGFS_USET_DEFAULT) {
312 down(&sd->s_dentry->d_inode->i_sem); 312 mutex_lock(&sd->s_dentry->d_inode->i_mutex);
313 /* Mark that we've taken i_sem */ 313 /* Mark that we've taken i_mutex */
314 sd->s_type |= CONFIGFS_USET_DROPPING; 314 sd->s_type |= CONFIGFS_USET_DROPPING;
315 315
316 ret = configfs_detach_prep(sd->s_dentry); 316 ret = configfs_detach_prep(sd->s_dentry);
@@ -327,7 +327,7 @@ out:
327} 327}
328 328
329/* 329/*
330 * Walk the tree, dropping i_sem wherever CONFIGFS_USET_DROPPING is 330 * Walk the tree, dropping i_mutex wherever CONFIGFS_USET_DROPPING is
331 * set. 331 * set.
332 */ 332 */
333static void configfs_detach_rollback(struct dentry *dentry) 333static void configfs_detach_rollback(struct dentry *dentry)
@@ -341,7 +341,7 @@ static void configfs_detach_rollback(struct dentry *dentry)
341 341
342 if (sd->s_type & CONFIGFS_USET_DROPPING) { 342 if (sd->s_type & CONFIGFS_USET_DROPPING) {
343 sd->s_type &= ~CONFIGFS_USET_DROPPING; 343 sd->s_type &= ~CONFIGFS_USET_DROPPING;
344 up(&sd->s_dentry->d_inode->i_sem); 344 mutex_unlock(&sd->s_dentry->d_inode->i_mutex);
345 } 345 }
346 } 346 }
347 } 347 }
@@ -424,11 +424,11 @@ static void detach_groups(struct config_group *group)
424 424
425 /* 425 /*
426 * From rmdir/unregister, a configfs_detach_prep() pass 426 * From rmdir/unregister, a configfs_detach_prep() pass
427 * has taken our i_sem for us. Drop it. 427 * has taken our i_mutex for us. Drop it.
428 * From mkdir/register cleanup, there is no sem held. 428 * From mkdir/register cleanup, there is no sem held.
429 */ 429 */
430 if (sd->s_type & CONFIGFS_USET_DROPPING) 430 if (sd->s_type & CONFIGFS_USET_DROPPING)
431 up(&child->d_inode->i_sem); 431 mutex_unlock(&child->d_inode->i_mutex);
432 432
433 d_delete(child); 433 d_delete(child);
434 dput(child); 434 dput(child);
@@ -493,11 +493,11 @@ static int populate_groups(struct config_group *group)
493 /* FYI, we're faking mkdir here 493 /* FYI, we're faking mkdir here
494 * I'm not sure we need this semaphore, as we're called 494 * I'm not sure we need this semaphore, as we're called
495 * from our parent's mkdir. That holds our parent's 495 * from our parent's mkdir. That holds our parent's
496 * i_sem, so afaik lookup cannot continue through our 496 * i_mutex, so afaik lookup cannot continue through our
497 * parent to find us, let alone mess with our tree. 497 * parent to find us, let alone mess with our tree.
498 * That said, taking our i_sem is closer to mkdir 498 * That said, taking our i_mutex is closer to mkdir
499 * emulation, and shouldn't hurt. */ 499 * emulation, and shouldn't hurt. */
500 down(&dentry->d_inode->i_sem); 500 mutex_lock(&dentry->d_inode->i_mutex);
501 501
502 for (i = 0; group->default_groups[i]; i++) { 502 for (i = 0; group->default_groups[i]; i++) {
503 new_group = group->default_groups[i]; 503 new_group = group->default_groups[i];
@@ -507,7 +507,7 @@ static int populate_groups(struct config_group *group)
507 break; 507 break;
508 } 508 }
509 509
510 up(&dentry->d_inode->i_sem); 510 mutex_unlock(&dentry->d_inode->i_mutex);
511 } 511 }
512 512
513 if (ret) 513 if (ret)
@@ -856,7 +856,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
856 down_write(&configfs_rename_sem); 856 down_write(&configfs_rename_sem);
857 parent = item->parent->dentry; 857 parent = item->parent->dentry;
858 858
859 down(&parent->d_inode->i_sem); 859 mutex_lock(&parent->d_inode->i_mutex);
860 860
861 new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); 861 new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
862 if (!IS_ERR(new_dentry)) { 862 if (!IS_ERR(new_dentry)) {
@@ -872,7 +872,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
872 error = -EEXIST; 872 error = -EEXIST;
873 dput(new_dentry); 873 dput(new_dentry);
874 } 874 }
875 up(&parent->d_inode->i_sem); 875 mutex_unlock(&parent->d_inode->i_mutex);
876 up_write(&configfs_rename_sem); 876 up_write(&configfs_rename_sem);
877 877
878 return error; 878 return error;
@@ -884,9 +884,9 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
884 struct dentry * dentry = file->f_dentry; 884 struct dentry * dentry = file->f_dentry;
885 struct configfs_dirent * parent_sd = dentry->d_fsdata; 885 struct configfs_dirent * parent_sd = dentry->d_fsdata;
886 886
887 down(&dentry->d_inode->i_sem); 887 mutex_lock(&dentry->d_inode->i_mutex);
888 file->private_data = configfs_new_dirent(parent_sd, NULL); 888 file->private_data = configfs_new_dirent(parent_sd, NULL);
889 up(&dentry->d_inode->i_sem); 889 mutex_unlock(&dentry->d_inode->i_mutex);
890 890
891 return file->private_data ? 0 : -ENOMEM; 891 return file->private_data ? 0 : -ENOMEM;
892 892
@@ -897,9 +897,9 @@ static int configfs_dir_close(struct inode *inode, struct file *file)
897 struct dentry * dentry = file->f_dentry; 897 struct dentry * dentry = file->f_dentry;
898 struct configfs_dirent * cursor = file->private_data; 898 struct configfs_dirent * cursor = file->private_data;
899 899
900 down(&dentry->d_inode->i_sem); 900 mutex_lock(&dentry->d_inode->i_mutex);
901 list_del_init(&cursor->s_sibling); 901 list_del_init(&cursor->s_sibling);
902 up(&dentry->d_inode->i_sem); 902 mutex_unlock(&dentry->d_inode->i_mutex);
903 903
904 release_configfs_dirent(cursor); 904 release_configfs_dirent(cursor);
905 905
@@ -975,7 +975,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
975{ 975{
976 struct dentry * dentry = file->f_dentry; 976 struct dentry * dentry = file->f_dentry;
977 977
978 down(&dentry->d_inode->i_sem); 978 mutex_lock(&dentry->d_inode->i_mutex);
979 switch (origin) { 979 switch (origin) {
980 case 1: 980 case 1:
981 offset += file->f_pos; 981 offset += file->f_pos;
@@ -983,7 +983,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
983 if (offset >= 0) 983 if (offset >= 0)
984 break; 984 break;
985 default: 985 default:
986 up(&file->f_dentry->d_inode->i_sem); 986 mutex_unlock(&file->f_dentry->d_inode->i_mutex);
987 return -EINVAL; 987 return -EINVAL;
988 } 988 }
989 if (offset != file->f_pos) { 989 if (offset != file->f_pos) {
@@ -1007,7 +1007,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
1007 list_add_tail(&cursor->s_sibling, p); 1007 list_add_tail(&cursor->s_sibling, p);
1008 } 1008 }
1009 } 1009 }
1010 up(&dentry->d_inode->i_sem); 1010 mutex_unlock(&dentry->d_inode->i_mutex);
1011 return offset; 1011 return offset;
1012} 1012}
1013 1013
@@ -1037,7 +1037,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1037 sd = configfs_sb->s_root->d_fsdata; 1037 sd = configfs_sb->s_root->d_fsdata;
1038 link_group(to_config_group(sd->s_element), group); 1038 link_group(to_config_group(sd->s_element), group);
1039 1039
1040 down(&configfs_sb->s_root->d_inode->i_sem); 1040 mutex_lock(&configfs_sb->s_root->d_inode->i_mutex);
1041 1041
1042 name.name = group->cg_item.ci_name; 1042 name.name = group->cg_item.ci_name;
1043 name.len = strlen(name.name); 1043 name.len = strlen(name.name);
@@ -1057,7 +1057,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1057 else 1057 else
1058 d_delete(dentry); 1058 d_delete(dentry);
1059 1059
1060 up(&configfs_sb->s_root->d_inode->i_sem); 1060 mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
1061 1061
1062 if (dentry) { 1062 if (dentry) {
1063 dput(dentry); 1063 dput(dentry);
@@ -1079,18 +1079,18 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
1079 return; 1079 return;
1080 } 1080 }
1081 1081
1082 down(&configfs_sb->s_root->d_inode->i_sem); 1082 mutex_lock(&configfs_sb->s_root->d_inode->i_mutex);
1083 down(&dentry->d_inode->i_sem); 1083 mutex_lock(&dentry->d_inode->i_mutex);
1084 if (configfs_detach_prep(dentry)) { 1084 if (configfs_detach_prep(dentry)) {
1085 printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); 1085 printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
1086 } 1086 }
1087 configfs_detach_group(&group->cg_item); 1087 configfs_detach_group(&group->cg_item);
1088 dentry->d_inode->i_flags |= S_DEAD; 1088 dentry->d_inode->i_flags |= S_DEAD;
1089 up(&dentry->d_inode->i_sem); 1089 mutex_unlock(&dentry->d_inode->i_mutex);
1090 1090
1091 d_delete(dentry); 1091 d_delete(dentry);
1092 1092
1093 up(&configfs_sb->s_root->d_inode->i_sem); 1093 mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
1094 1094
1095 dput(dentry); 1095 dput(dentry);
1096 1096
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index af1ffc9a15c0..c26cd61f13af 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -336,9 +336,9 @@ int configfs_add_file(struct dentry * dir, const struct configfs_attribute * att
336 umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG; 336 umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
337 int error = 0; 337 int error = 0;
338 338
339 down(&dir->d_inode->i_sem); 339 mutex_lock(&dir->d_inode->i_mutex);
340 error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type); 340 error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type);
341 up(&dir->d_inode->i_sem); 341 mutex_unlock(&dir->d_inode->i_mutex);
342 342
343 return error; 343 return error;
344} 344}
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 6b274c6d428f..6577c588de9d 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -122,7 +122,7 @@ const unsigned char * configfs_get_name(struct configfs_dirent *sd)
122 122
123/* 123/*
124 * Unhashes the dentry corresponding to given configfs_dirent 124 * Unhashes the dentry corresponding to given configfs_dirent
125 * Called with parent inode's i_sem held. 125 * Called with parent inode's i_mutex held.
126 */ 126 */
127void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) 127void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
128{ 128{
@@ -145,7 +145,7 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
145 struct configfs_dirent * sd; 145 struct configfs_dirent * sd;
146 struct configfs_dirent * parent_sd = dir->d_fsdata; 146 struct configfs_dirent * parent_sd = dir->d_fsdata;
147 147
148 down(&dir->d_inode->i_sem); 148 mutex_lock(&dir->d_inode->i_mutex);
149 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 149 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
150 if (!sd->s_element) 150 if (!sd->s_element)
151 continue; 151 continue;
@@ -156,7 +156,7 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
156 break; 156 break;
157 } 157 }
158 } 158 }
159 up(&dir->d_inode->i_sem); 159 mutex_unlock(&dir->d_inode->i_mutex);
160} 160}
161 161
162 162
diff --git a/fs/dcache.c b/fs/dcache.c
index 17e439138681..86bdb93789c6 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -71,7 +71,7 @@ struct dentry_stat_t dentry_stat = {
71 71
72static void d_callback(struct rcu_head *head) 72static void d_callback(struct rcu_head *head)
73{ 73{
74 struct dentry * dentry = container_of(head, struct dentry, d_rcu); 74 struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
75 75
76 if (dname_external(dentry)) 76 if (dname_external(dentry))
77 kfree(dentry->d_name.name); 77 kfree(dentry->d_name.name);
@@ -86,7 +86,7 @@ static void d_free(struct dentry *dentry)
86{ 86{
87 if (dentry->d_op && dentry->d_op->d_release) 87 if (dentry->d_op && dentry->d_op->d_release)
88 dentry->d_op->d_release(dentry); 88 dentry->d_op->d_release(dentry);
89 call_rcu(&dentry->d_rcu, d_callback); 89 call_rcu(&dentry->d_u.d_rcu, d_callback);
90} 90}
91 91
92/* 92/*
@@ -94,7 +94,7 @@ static void d_free(struct dentry *dentry)
94 * d_iput() operation if defined. 94 * d_iput() operation if defined.
95 * Called with dcache_lock and per dentry lock held, drops both. 95 * Called with dcache_lock and per dentry lock held, drops both.
96 */ 96 */
97static inline void dentry_iput(struct dentry * dentry) 97static void dentry_iput(struct dentry * dentry)
98{ 98{
99 struct inode *inode = dentry->d_inode; 99 struct inode *inode = dentry->d_inode;
100 if (inode) { 100 if (inode) {
@@ -193,7 +193,7 @@ kill_it: {
193 list_del(&dentry->d_lru); 193 list_del(&dentry->d_lru);
194 dentry_stat.nr_unused--; 194 dentry_stat.nr_unused--;
195 } 195 }
196 list_del(&dentry->d_child); 196 list_del(&dentry->d_u.d_child);
197 dentry_stat.nr_dentry--; /* For d_free, below */ 197 dentry_stat.nr_dentry--; /* For d_free, below */
198 /*drops the locks, at that point nobody can reach this dentry */ 198 /*drops the locks, at that point nobody can reach this dentry */
199 dentry_iput(dentry); 199 dentry_iput(dentry);
@@ -367,7 +367,7 @@ static inline void prune_one_dentry(struct dentry * dentry)
367 struct dentry * parent; 367 struct dentry * parent;
368 368
369 __d_drop(dentry); 369 __d_drop(dentry);
370 list_del(&dentry->d_child); 370 list_del(&dentry->d_u.d_child);
371 dentry_stat.nr_dentry--; /* For d_free, below */ 371 dentry_stat.nr_dentry--; /* For d_free, below */
372 dentry_iput(dentry); 372 dentry_iput(dentry);
373 parent = dentry->d_parent; 373 parent = dentry->d_parent;
@@ -518,7 +518,7 @@ repeat:
518resume: 518resume:
519 while (next != &this_parent->d_subdirs) { 519 while (next != &this_parent->d_subdirs) {
520 struct list_head *tmp = next; 520 struct list_head *tmp = next;
521 struct dentry *dentry = list_entry(tmp, struct dentry, d_child); 521 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
522 next = tmp->next; 522 next = tmp->next;
523 /* Have we found a mount point ? */ 523 /* Have we found a mount point ? */
524 if (d_mountpoint(dentry)) 524 if (d_mountpoint(dentry))
@@ -532,7 +532,7 @@ resume:
532 * All done at this level ... ascend and resume the search. 532 * All done at this level ... ascend and resume the search.
533 */ 533 */
534 if (this_parent != parent) { 534 if (this_parent != parent) {
535 next = this_parent->d_child.next; 535 next = this_parent->d_u.d_child.next;
536 this_parent = this_parent->d_parent; 536 this_parent = this_parent->d_parent;
537 goto resume; 537 goto resume;
538 } 538 }
@@ -569,7 +569,7 @@ repeat:
569resume: 569resume:
570 while (next != &this_parent->d_subdirs) { 570 while (next != &this_parent->d_subdirs) {
571 struct list_head *tmp = next; 571 struct list_head *tmp = next;
572 struct dentry *dentry = list_entry(tmp, struct dentry, d_child); 572 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
573 next = tmp->next; 573 next = tmp->next;
574 574
575 if (!list_empty(&dentry->d_lru)) { 575 if (!list_empty(&dentry->d_lru)) {
@@ -610,7 +610,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name, found);
610 * All done at this level ... ascend and resume the search. 610 * All done at this level ... ascend and resume the search.
611 */ 611 */
612 if (this_parent != parent) { 612 if (this_parent != parent) {
613 next = this_parent->d_child.next; 613 next = this_parent->d_u.d_child.next;
614 this_parent = this_parent->d_parent; 614 this_parent = this_parent->d_parent;
615#ifdef DCACHE_DEBUG 615#ifdef DCACHE_DEBUG
616printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n", 616printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
@@ -753,12 +753,12 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
753 dentry->d_parent = dget(parent); 753 dentry->d_parent = dget(parent);
754 dentry->d_sb = parent->d_sb; 754 dentry->d_sb = parent->d_sb;
755 } else { 755 } else {
756 INIT_LIST_HEAD(&dentry->d_child); 756 INIT_LIST_HEAD(&dentry->d_u.d_child);
757 } 757 }
758 758
759 spin_lock(&dcache_lock); 759 spin_lock(&dcache_lock);
760 if (parent) 760 if (parent)
761 list_add(&dentry->d_child, &parent->d_subdirs); 761 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
762 dentry_stat.nr_dentry++; 762 dentry_stat.nr_dentry++;
763 spin_unlock(&dcache_lock); 763 spin_unlock(&dcache_lock);
764 764
@@ -808,10 +808,14 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
808 * 808 *
809 * Fill in inode information in the entry. On success, it returns NULL. 809 * Fill in inode information in the entry. On success, it returns NULL.
810 * If an unhashed alias of "entry" already exists, then we return the 810 * If an unhashed alias of "entry" already exists, then we return the
811 * aliased dentry instead. 811 * aliased dentry instead and drop one reference to inode.
812 * 812 *
813 * Note that in order to avoid conflicts with rename() etc, the caller 813 * Note that in order to avoid conflicts with rename() etc, the caller
814 * had better be holding the parent directory semaphore. 814 * had better be holding the parent directory semaphore.
815 *
816 * This also assumes that the inode count has been incremented
817 * (or otherwise set) by the caller to indicate that it is now
818 * in use by the dcache.
815 */ 819 */
816struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) 820struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
817{ 821{
@@ -838,6 +842,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
838 dget_locked(alias); 842 dget_locked(alias);
839 spin_unlock(&dcache_lock); 843 spin_unlock(&dcache_lock);
840 BUG_ON(!d_unhashed(alias)); 844 BUG_ON(!d_unhashed(alias));
845 iput(inode);
841 return alias; 846 return alias;
842 } 847 }
843 list_add(&entry->d_alias, &inode->i_dentry); 848 list_add(&entry->d_alias, &inode->i_dentry);
@@ -1310,8 +1315,8 @@ already_unhashed:
1310 /* Unhash the target: dput() will then get rid of it */ 1315 /* Unhash the target: dput() will then get rid of it */
1311 __d_drop(target); 1316 __d_drop(target);
1312 1317
1313 list_del(&dentry->d_child); 1318 list_del(&dentry->d_u.d_child);
1314 list_del(&target->d_child); 1319 list_del(&target->d_u.d_child);
1315 1320
1316 /* Switch the names.. */ 1321 /* Switch the names.. */
1317 switch_names(dentry, target); 1322 switch_names(dentry, target);
@@ -1322,15 +1327,15 @@ already_unhashed:
1322 if (IS_ROOT(dentry)) { 1327 if (IS_ROOT(dentry)) {
1323 dentry->d_parent = target->d_parent; 1328 dentry->d_parent = target->d_parent;
1324 target->d_parent = target; 1329 target->d_parent = target;
1325 INIT_LIST_HEAD(&target->d_child); 1330 INIT_LIST_HEAD(&target->d_u.d_child);
1326 } else { 1331 } else {
1327 do_switch(dentry->d_parent, target->d_parent); 1332 do_switch(dentry->d_parent, target->d_parent);
1328 1333
1329 /* And add them back to the (new) parent lists */ 1334 /* And add them back to the (new) parent lists */
1330 list_add(&target->d_child, &target->d_parent->d_subdirs); 1335 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
1331 } 1336 }
1332 1337
1333 list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); 1338 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
1334 spin_unlock(&target->d_lock); 1339 spin_unlock(&target->d_lock);
1335 spin_unlock(&dentry->d_lock); 1340 spin_unlock(&dentry->d_lock);
1336 write_sequnlock(&rename_lock); 1341 write_sequnlock(&rename_lock);
@@ -1568,7 +1573,7 @@ repeat:
1568resume: 1573resume:
1569 while (next != &this_parent->d_subdirs) { 1574 while (next != &this_parent->d_subdirs) {
1570 struct list_head *tmp = next; 1575 struct list_head *tmp = next;
1571 struct dentry *dentry = list_entry(tmp, struct dentry, d_child); 1576 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1572 next = tmp->next; 1577 next = tmp->next;
1573 if (d_unhashed(dentry)||!dentry->d_inode) 1578 if (d_unhashed(dentry)||!dentry->d_inode)
1574 continue; 1579 continue;
@@ -1579,7 +1584,7 @@ resume:
1579 atomic_dec(&dentry->d_count); 1584 atomic_dec(&dentry->d_count);
1580 } 1585 }
1581 if (this_parent != root) { 1586 if (this_parent != root) {
1582 next = this_parent->d_child.next; 1587 next = this_parent->d_u.d_child.next;
1583 atomic_dec(&this_parent->d_count); 1588 atomic_dec(&this_parent->d_count);
1584 this_parent = this_parent->d_parent; 1589 this_parent = this_parent->d_parent;
1585 goto resume; 1590 goto resume;
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 02aa0ddc582a..f8274a8f83bd 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -18,6 +18,7 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/list.h> 19#include <linux/list.h>
20#include <linux/mount.h> 20#include <linux/mount.h>
21#include <linux/capability.h>
21#include <linux/dcache.h> 22#include <linux/dcache.h>
22#include <linux/mm.h> 23#include <linux/mm.h>
23#include <linux/errno.h> 24#include <linux/errno.h>
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index a86ac4aeaedb..d4f1a2cddd47 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -146,7 +146,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
146 } 146 }
147 147
148 *dentry = NULL; 148 *dentry = NULL;
149 down(&parent->d_inode->i_sem); 149 mutex_lock(&parent->d_inode->i_mutex);
150 *dentry = lookup_one_len(name, parent, strlen(name)); 150 *dentry = lookup_one_len(name, parent, strlen(name));
151 if (!IS_ERR(dentry)) { 151 if (!IS_ERR(dentry)) {
152 if ((mode & S_IFMT) == S_IFDIR) 152 if ((mode & S_IFMT) == S_IFDIR)
@@ -155,7 +155,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
155 error = debugfs_create(parent->d_inode, *dentry, mode); 155 error = debugfs_create(parent->d_inode, *dentry, mode);
156 } else 156 } else
157 error = PTR_ERR(dentry); 157 error = PTR_ERR(dentry);
158 up(&parent->d_inode->i_sem); 158 mutex_unlock(&parent->d_inode->i_mutex);
159 159
160 return error; 160 return error;
161} 161}
@@ -273,7 +273,7 @@ void debugfs_remove(struct dentry *dentry)
273 if (!parent || !parent->d_inode) 273 if (!parent || !parent->d_inode)
274 return; 274 return;
275 275
276 down(&parent->d_inode->i_sem); 276 mutex_lock(&parent->d_inode->i_mutex);
277 if (debugfs_positive(dentry)) { 277 if (debugfs_positive(dentry)) {
278 if (dentry->d_inode) { 278 if (dentry->d_inode) {
279 if (S_ISDIR(dentry->d_inode->i_mode)) 279 if (S_ISDIR(dentry->d_inode->i_mode))
@@ -283,7 +283,7 @@ void debugfs_remove(struct dentry *dentry)
283 dput(dentry); 283 dput(dentry);
284 } 284 }
285 } 285 }
286 up(&parent->d_inode->i_sem); 286 mutex_unlock(&parent->d_inode->i_mutex);
287 simple_release_fs(&debugfs_mount, &debugfs_mount_count); 287 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
288} 288}
289EXPORT_SYMBOL_GPL(debugfs_remove); 289EXPORT_SYMBOL_GPL(debugfs_remove);
diff --git a/fs/devfs/base.c b/fs/devfs/base.c
index 1274422a5384..b621521e09d4 100644
--- a/fs/devfs/base.c
+++ b/fs/devfs/base.c
@@ -2162,27 +2162,27 @@ static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd)
2162 * 2162 *
2163 * make sure that 2163 * make sure that
2164 * d_instantiate always runs under lock 2164 * d_instantiate always runs under lock
2165 * we release i_sem lock before going to sleep 2165 * we release i_mutex lock before going to sleep
2166 * 2166 *
2167 * unfortunately sometimes d_revalidate is called with 2167 * unfortunately sometimes d_revalidate is called with
2168 * and sometimes without i_sem lock held. The following checks 2168 * and sometimes without i_mutex lock held. The following checks
2169 * attempt to deduce when we need to add (and drop resp.) lock 2169 * attempt to deduce when we need to add (and drop resp.) lock
2170 * here. This relies on current (2.6.2) calling coventions: 2170 * here. This relies on current (2.6.2) calling coventions:
2171 * 2171 *
2172 * lookup_hash is always run under i_sem and is passing NULL 2172 * lookup_hash is always run under i_mutex and is passing NULL
2173 * as nd 2173 * as nd
2174 * 2174 *
2175 * open(...,O_CREATE,...) calls _lookup_hash under i_sem 2175 * open(...,O_CREATE,...) calls _lookup_hash under i_mutex
2176 * and sets flags to LOOKUP_OPEN|LOOKUP_CREATE 2176 * and sets flags to LOOKUP_OPEN|LOOKUP_CREATE
2177 * 2177 *
2178 * all other invocations of ->d_revalidate seem to happen 2178 * all other invocations of ->d_revalidate seem to happen
2179 * outside of i_sem 2179 * outside of i_mutex
2180 */ 2180 */
2181 need_lock = nd && 2181 need_lock = nd &&
2182 (!(nd->flags & LOOKUP_CREATE) || (nd->flags & LOOKUP_PARENT)); 2182 (!(nd->flags & LOOKUP_CREATE) || (nd->flags & LOOKUP_PARENT));
2183 2183
2184 if (need_lock) 2184 if (need_lock)
2185 down(&dir->i_sem); 2185 mutex_lock(&dir->i_mutex);
2186 2186
2187 if (is_devfsd_or_child(fs_info)) { 2187 if (is_devfsd_or_child(fs_info)) {
2188 devfs_handle_t de = lookup_info->de; 2188 devfs_handle_t de = lookup_info->de;
@@ -2221,9 +2221,9 @@ static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd)
2221 add_wait_queue(&lookup_info->wait_queue, &wait); 2221 add_wait_queue(&lookup_info->wait_queue, &wait);
2222 read_unlock(&parent->u.dir.lock); 2222 read_unlock(&parent->u.dir.lock);
2223 /* at this point it is always (hopefully) locked */ 2223 /* at this point it is always (hopefully) locked */
2224 up(&dir->i_sem); 2224 mutex_unlock(&dir->i_mutex);
2225 schedule(); 2225 schedule();
2226 down(&dir->i_sem); 2226 mutex_lock(&dir->i_mutex);
2227 /* 2227 /*
2228 * This does not need nor should remove wait from wait_queue. 2228 * This does not need nor should remove wait from wait_queue.
2229 * Wait queue head is never reused - nothing is ever added to it 2229 * Wait queue head is never reused - nothing is ever added to it
@@ -2238,7 +2238,7 @@ static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd)
2238 2238
2239 out: 2239 out:
2240 if (need_lock) 2240 if (need_lock)
2241 up(&dir->i_sem); 2241 mutex_unlock(&dir->i_mutex);
2242 return 1; 2242 return 1;
2243} /* End Function devfs_d_revalidate_wait */ 2243} /* End Function devfs_d_revalidate_wait */
2244 2244
@@ -2284,9 +2284,9 @@ static struct dentry *devfs_lookup(struct inode *dir, struct dentry *dentry,
2284 /* Unlock directory semaphore, which will release any waiters. They 2284 /* Unlock directory semaphore, which will release any waiters. They
2285 will get the hashed dentry, and may be forced to wait for 2285 will get the hashed dentry, and may be forced to wait for
2286 revalidation */ 2286 revalidation */
2287 up(&dir->i_sem); 2287 mutex_unlock(&dir->i_mutex);
2288 wait_for_devfsd_finished(fs_info); /* If I'm not devfsd, must wait */ 2288 wait_for_devfsd_finished(fs_info); /* If I'm not devfsd, must wait */
2289 down(&dir->i_sem); /* Grab it again because them's the rules */ 2289 mutex_lock(&dir->i_mutex); /* Grab it again because them's the rules */
2290 de = lookup_info.de; 2290 de = lookup_info.de;
2291 /* If someone else has been so kind as to make the inode, we go home 2291 /* If someone else has been so kind as to make the inode, we go home
2292 early */ 2292 early */
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index f2be44d4491f..bfb8a230bac9 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -130,7 +130,7 @@ static struct dentry *get_node(int num)
130{ 130{
131 char s[12]; 131 char s[12];
132 struct dentry *root = devpts_root; 132 struct dentry *root = devpts_root;
133 down(&root->d_inode->i_sem); 133 mutex_lock(&root->d_inode->i_mutex);
134 return lookup_one_len(s, root, sprintf(s, "%d", num)); 134 return lookup_one_len(s, root, sprintf(s, "%d", num));
135} 135}
136 136
@@ -161,7 +161,7 @@ int devpts_pty_new(struct tty_struct *tty)
161 if (!IS_ERR(dentry) && !dentry->d_inode) 161 if (!IS_ERR(dentry) && !dentry->d_inode)
162 d_instantiate(dentry, inode); 162 d_instantiate(dentry, inode);
163 163
164 up(&devpts_root->d_inode->i_sem); 164 mutex_unlock(&devpts_root->d_inode->i_mutex);
165 165
166 return 0; 166 return 0;
167} 167}
@@ -178,7 +178,7 @@ struct tty_struct *devpts_get_tty(int number)
178 dput(dentry); 178 dput(dentry);
179 } 179 }
180 180
181 up(&devpts_root->d_inode->i_sem); 181 mutex_unlock(&devpts_root->d_inode->i_mutex);
182 182
183 return tty; 183 return tty;
184} 184}
@@ -196,7 +196,7 @@ void devpts_pty_kill(int number)
196 } 196 }
197 dput(dentry); 197 dput(dentry);
198 } 198 }
199 up(&devpts_root->d_inode->i_sem); 199 mutex_unlock(&devpts_root->d_inode->i_mutex);
200} 200}
201 201
202static int __init init_devpts_fs(void) 202static int __init init_devpts_fs(void)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 3931e7f1e6bf..30dbbd1df511 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -56,7 +56,7 @@
56 * lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems. 56 * lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems.
57 * This determines whether we need to do the fancy locking which prevents 57 * This determines whether we need to do the fancy locking which prevents
58 * direct-IO from being able to read uninitialised disk blocks. If its zero 58 * direct-IO from being able to read uninitialised disk blocks. If its zero
59 * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_sem is 59 * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_mutex is
60 * not held for the entire direct write (taken briefly, initially, during a 60 * not held for the entire direct write (taken briefly, initially, during a
61 * direct read though, but its never held for the duration of a direct-IO). 61 * direct read though, but its never held for the duration of a direct-IO).
62 */ 62 */
@@ -930,7 +930,7 @@ out:
930} 930}
931 931
932/* 932/*
933 * Releases both i_sem and i_alloc_sem 933 * Releases both i_mutex and i_alloc_sem
934 */ 934 */
935static ssize_t 935static ssize_t
936direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 936direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
@@ -1062,11 +1062,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1062 1062
1063 /* 1063 /*
1064 * All block lookups have been performed. For READ requests 1064 * All block lookups have been performed. For READ requests
1065 * we can let i_sem go now that its achieved its purpose 1065 * we can let i_mutex go now that its achieved its purpose
1066 * of protecting us from looking up uninitialized blocks. 1066 * of protecting us from looking up uninitialized blocks.
1067 */ 1067 */
1068 if ((rw == READ) && (dio->lock_type == DIO_LOCKING)) 1068 if ((rw == READ) && (dio->lock_type == DIO_LOCKING))
1069 up(&dio->inode->i_sem); 1069 mutex_unlock(&dio->inode->i_mutex);
1070 1070
1071 /* 1071 /*
1072 * OK, all BIOs are submitted, so we can decrement bio_count to truly 1072 * OK, all BIOs are submitted, so we can decrement bio_count to truly
@@ -1145,18 +1145,18 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1145 * The locking rules are governed by the dio_lock_type parameter. 1145 * The locking rules are governed by the dio_lock_type parameter.
1146 * 1146 *
1147 * DIO_NO_LOCKING (no locking, for raw block device access) 1147 * DIO_NO_LOCKING (no locking, for raw block device access)
1148 * For writes, i_sem is not held on entry; it is never taken. 1148 * For writes, i_mutex is not held on entry; it is never taken.
1149 * 1149 *
1150 * DIO_LOCKING (simple locking for regular files) 1150 * DIO_LOCKING (simple locking for regular files)
1151 * For writes we are called under i_sem and return with i_sem held, even though 1151 * For writes we are called under i_mutex and return with i_mutex held, even though
1152 * it is internally dropped. 1152 * it is internally dropped.
1153 * For reads, i_sem is not held on entry, but it is taken and dropped before 1153 * For reads, i_mutex is not held on entry, but it is taken and dropped before
1154 * returning. 1154 * returning.
1155 * 1155 *
1156 * DIO_OWN_LOCKING (filesystem provides synchronisation and handling of 1156 * DIO_OWN_LOCKING (filesystem provides synchronisation and handling of
1157 * uninitialised data, allowing parallel direct readers and writers) 1157 * uninitialised data, allowing parallel direct readers and writers)
1158 * For writes we are called without i_sem, return without it, never touch it. 1158 * For writes we are called without i_mutex, return without it, never touch it.
1159 * For reads, i_sem is held on entry and will be released before returning. 1159 * For reads, i_mutex is held on entry and will be released before returning.
1160 * 1160 *
1161 * Additional i_alloc_sem locking requirements described inline below. 1161 * Additional i_alloc_sem locking requirements described inline below.
1162 */ 1162 */
@@ -1214,11 +1214,11 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1214 * For block device access DIO_NO_LOCKING is used, 1214 * For block device access DIO_NO_LOCKING is used,
1215 * neither readers nor writers do any locking at all 1215 * neither readers nor writers do any locking at all
1216 * For regular files using DIO_LOCKING, 1216 * For regular files using DIO_LOCKING,
1217 * readers need to grab i_sem and i_alloc_sem 1217 * readers need to grab i_mutex and i_alloc_sem
1218 * writers need to grab i_alloc_sem only (i_sem is already held) 1218 * writers need to grab i_alloc_sem only (i_mutex is already held)
1219 * For regular files using DIO_OWN_LOCKING, 1219 * For regular files using DIO_OWN_LOCKING,
1220 * neither readers nor writers take any locks here 1220 * neither readers nor writers take any locks here
1221 * (i_sem is already held and release for writers here) 1221 * (i_mutex is already held and release for writers here)
1222 */ 1222 */
1223 dio->lock_type = dio_lock_type; 1223 dio->lock_type = dio_lock_type;
1224 if (dio_lock_type != DIO_NO_LOCKING) { 1224 if (dio_lock_type != DIO_NO_LOCKING) {
@@ -1228,7 +1228,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1228 1228
1229 mapping = iocb->ki_filp->f_mapping; 1229 mapping = iocb->ki_filp->f_mapping;
1230 if (dio_lock_type != DIO_OWN_LOCKING) { 1230 if (dio_lock_type != DIO_OWN_LOCKING) {
1231 down(&inode->i_sem); 1231 mutex_lock(&inode->i_mutex);
1232 reader_with_isem = 1; 1232 reader_with_isem = 1;
1233 } 1233 }
1234 1234
@@ -1240,7 +1240,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1240 } 1240 }
1241 1241
1242 if (dio_lock_type == DIO_OWN_LOCKING) { 1242 if (dio_lock_type == DIO_OWN_LOCKING) {
1243 up(&inode->i_sem); 1243 mutex_unlock(&inode->i_mutex);
1244 reader_with_isem = 0; 1244 reader_with_isem = 0;
1245 } 1245 }
1246 } 1246 }
@@ -1266,7 +1266,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1266 1266
1267out: 1267out:
1268 if (reader_with_isem) 1268 if (reader_with_isem)
1269 up(&inode->i_sem); 1269 mutex_unlock(&inode->i_mutex);
1270 if (rw & WRITE) 1270 if (rw & WRITE)
1271 current->flags &= ~PF_SYNCWRITE; 1271 current->flags &= ~PF_SYNCWRITE;
1272 return retval; 1272 return retval;
diff --git a/fs/dquot.c b/fs/dquot.c
index 2a62b3dc20ec..1966c890b48d 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -77,6 +77,7 @@
77#include <linux/kmod.h> 77#include <linux/kmod.h>
78#include <linux/namei.h> 78#include <linux/namei.h>
79#include <linux/buffer_head.h> 79#include <linux/buffer_head.h>
80#include <linux/capability.h>
80#include <linux/quotaops.h> 81#include <linux/quotaops.h>
81 82
82#include <asm/uaccess.h> 83#include <asm/uaccess.h>
@@ -100,7 +101,7 @@
100 * operation is just reading pointers from inode (or not using them at all) the 101 * operation is just reading pointers from inode (or not using them at all) the
101 * read lock is enough. If pointers are altered function must hold write lock 102 * read lock is enough. If pointers are altered function must hold write lock
102 * (these locking rules also apply for S_NOQUOTA flag in the inode - note that 103 * (these locking rules also apply for S_NOQUOTA flag in the inode - note that
103 * for altering the flag i_sem is also needed). If operation is holding 104 * for altering the flag i_mutex is also needed). If operation is holding
104 * reference to dquot in other way (e.g. quotactl ops) it must be guarded by 105 * reference to dquot in other way (e.g. quotactl ops) it must be guarded by
105 * dqonoff_sem. 106 * dqonoff_sem.
106 * This locking assures that: 107 * This locking assures that:
@@ -117,9 +118,9 @@
117 * spinlock to internal buffers before writing. 118 * spinlock to internal buffers before writing.
118 * 119 *
119 * Lock ordering (including related VFS locks) is the following: 120 * Lock ordering (including related VFS locks) is the following:
120 * i_sem > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem > 121 * i_mutex > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem >
121 * > dquot->dq_lock > dqio_sem 122 * > dquot->dq_lock > dqio_sem
122 * i_sem on quota files is special (it's below dqio_sem) 123 * i_mutex on quota files is special (it's below dqio_sem)
123 */ 124 */
124 125
125static DEFINE_SPINLOCK(dq_list_lock); 126static DEFINE_SPINLOCK(dq_list_lock);
@@ -1369,11 +1370,11 @@ int vfs_quota_off(struct super_block *sb, int type)
1369 /* If quota was reenabled in the meantime, we have 1370 /* If quota was reenabled in the meantime, we have
1370 * nothing to do */ 1371 * nothing to do */
1371 if (!sb_has_quota_enabled(sb, cnt)) { 1372 if (!sb_has_quota_enabled(sb, cnt)) {
1372 down(&toputinode[cnt]->i_sem); 1373 mutex_lock(&toputinode[cnt]->i_mutex);
1373 toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | 1374 toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
1374 S_NOATIME | S_NOQUOTA); 1375 S_NOATIME | S_NOQUOTA);
1375 truncate_inode_pages(&toputinode[cnt]->i_data, 0); 1376 truncate_inode_pages(&toputinode[cnt]->i_data, 0);
1376 up(&toputinode[cnt]->i_sem); 1377 mutex_unlock(&toputinode[cnt]->i_mutex);
1377 mark_inode_dirty(toputinode[cnt]); 1378 mark_inode_dirty(toputinode[cnt]);
1378 iput(toputinode[cnt]); 1379 iput(toputinode[cnt]);
1379 } 1380 }
@@ -1417,7 +1418,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
1417 write_inode_now(inode, 1); 1418 write_inode_now(inode, 1);
1418 /* And now flush the block cache so that kernel sees the changes */ 1419 /* And now flush the block cache so that kernel sees the changes */
1419 invalidate_bdev(sb->s_bdev, 0); 1420 invalidate_bdev(sb->s_bdev, 0);
1420 down(&inode->i_sem); 1421 mutex_lock(&inode->i_mutex);
1421 down(&dqopt->dqonoff_sem); 1422 down(&dqopt->dqonoff_sem);
1422 if (sb_has_quota_enabled(sb, type)) { 1423 if (sb_has_quota_enabled(sb, type)) {
1423 error = -EBUSY; 1424 error = -EBUSY;
@@ -1449,7 +1450,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
1449 goto out_file_init; 1450 goto out_file_init;
1450 } 1451 }
1451 up(&dqopt->dqio_sem); 1452 up(&dqopt->dqio_sem);
1452 up(&inode->i_sem); 1453 mutex_unlock(&inode->i_mutex);
1453 set_enable_flags(dqopt, type); 1454 set_enable_flags(dqopt, type);
1454 1455
1455 add_dquot_ref(sb, type); 1456 add_dquot_ref(sb, type);
@@ -1470,7 +1471,7 @@ out_lock:
1470 inode->i_flags |= oldflags; 1471 inode->i_flags |= oldflags;
1471 up_write(&dqopt->dqptr_sem); 1472 up_write(&dqopt->dqptr_sem);
1472 } 1473 }
1473 up(&inode->i_sem); 1474 mutex_unlock(&inode->i_mutex);
1474out_fmt: 1475out_fmt:
1475 put_quota_format(fmt); 1476 put_quota_format(fmt);
1476 1477
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
new file mode 100644
index 000000000000..4e4762389bdc
--- /dev/null
+++ b/fs/drop_caches.c
@@ -0,0 +1,68 @@
1/*
2 * Implement the manual drop-all-pagecache function
3 */
4
5#include <linux/kernel.h>
6#include <linux/mm.h>
7#include <linux/fs.h>
8#include <linux/writeback.h>
9#include <linux/sysctl.h>
10#include <linux/gfp.h>
11
12/* A global variable is a bit ugly, but it keeps the code simple */
13int sysctl_drop_caches;
14
15static void drop_pagecache_sb(struct super_block *sb)
16{
17 struct inode *inode;
18
19 spin_lock(&inode_lock);
20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
21 if (inode->i_state & (I_FREEING|I_WILL_FREE))
22 continue;
23 invalidate_inode_pages(inode->i_mapping);
24 }
25 spin_unlock(&inode_lock);
26}
27
28void drop_pagecache(void)
29{
30 struct super_block *sb;
31
32 spin_lock(&sb_lock);
33restart:
34 list_for_each_entry(sb, &super_blocks, s_list) {
35 sb->s_count++;
36 spin_unlock(&sb_lock);
37 down_read(&sb->s_umount);
38 if (sb->s_root)
39 drop_pagecache_sb(sb);
40 up_read(&sb->s_umount);
41 spin_lock(&sb_lock);
42 if (__put_super_and_need_restart(sb))
43 goto restart;
44 }
45 spin_unlock(&sb_lock);
46}
47
48void drop_slab(void)
49{
50 int nr_objects;
51
52 do {
53 nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
54 } while (nr_objects > 10);
55}
56
57int drop_caches_sysctl_handler(ctl_table *table, int write,
58 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
59{
60 proc_dointvec_minmax(table, write, file, buffer, length, ppos);
61 if (write) {
62 if (sysctl_drop_caches & 1)
63 drop_pagecache();
64 if (sysctl_drop_caches & 2)
65 drop_slab();
66 }
67 return 0;
68}
diff --git a/fs/efs/super.c b/fs/efs/super.c
index d8d5ea9a9997..afc4891feb36 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -222,12 +222,13 @@ static efs_block_t efs_validate_vh(struct volume_header *vh) {
222 sblock); 222 sblock);
223#endif 223#endif
224 } 224 }
225 return(sblock); 225 return sblock;
226} 226}
227 227
228static int efs_validate_super(struct efs_sb_info *sb, struct efs_super *super) { 228static int efs_validate_super(struct efs_sb_info *sb, struct efs_super *super) {
229 229
230 if (!IS_EFS_MAGIC(be32_to_cpu(super->fs_magic))) return -1; 230 if (!IS_EFS_MAGIC(be32_to_cpu(super->fs_magic)))
231 return -1;
231 232
232 sb->fs_magic = be32_to_cpu(super->fs_magic); 233 sb->fs_magic = be32_to_cpu(super->fs_magic);
233 sb->total_blocks = be32_to_cpu(super->fs_size); 234 sb->total_blocks = be32_to_cpu(super->fs_size);
diff --git a/fs/exec.c b/fs/exec.c
index e75a9548da8e..055378d2513e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -477,7 +477,7 @@ struct file *open_exec(const char *name)
477 int err; 477 int err;
478 struct file *file; 478 struct file *file;
479 479
480 err = path_lookup_open(name, LOOKUP_FOLLOW, &nd, FMODE_READ); 480 err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ);
481 file = ERR_PTR(err); 481 file = ERR_PTR(err);
482 482
483 if (!err) { 483 if (!err) {
@@ -575,7 +575,7 @@ static int exec_mmap(struct mm_struct *mm)
575 * disturbing other processes. (Other processes might share the signal 575 * disturbing other processes. (Other processes might share the signal
576 * table via the CLONE_SIGHAND option to clone().) 576 * table via the CLONE_SIGHAND option to clone().)
577 */ 577 */
578static inline int de_thread(struct task_struct *tsk) 578static int de_thread(struct task_struct *tsk)
579{ 579{
580 struct signal_struct *sig = tsk->signal; 580 struct signal_struct *sig = tsk->signal;
581 struct sighand_struct *newsighand, *oldsighand = tsk->sighand; 581 struct sighand_struct *newsighand, *oldsighand = tsk->sighand;
@@ -632,10 +632,10 @@ static inline int de_thread(struct task_struct *tsk)
632 * synchronize with any firing (by calling del_timer_sync) 632 * synchronize with any firing (by calling del_timer_sync)
633 * before we can safely let the old group leader die. 633 * before we can safely let the old group leader die.
634 */ 634 */
635 sig->real_timer.data = (unsigned long)current; 635 sig->real_timer.data = current;
636 spin_unlock_irq(lock); 636 spin_unlock_irq(lock);
637 if (del_timer_sync(&sig->real_timer)) 637 if (hrtimer_cancel(&sig->real_timer))
638 add_timer(&sig->real_timer); 638 hrtimer_restart(&sig->real_timer);
639 spin_lock_irq(lock); 639 spin_lock_irq(lock);
640 } 640 }
641 while (atomic_read(&sig->count) > count) { 641 while (atomic_read(&sig->count) > count) {
@@ -760,7 +760,7 @@ no_thread_group:
760 spin_lock(&oldsighand->siglock); 760 spin_lock(&oldsighand->siglock);
761 spin_lock(&newsighand->siglock); 761 spin_lock(&newsighand->siglock);
762 762
763 current->sighand = newsighand; 763 rcu_assign_pointer(current->sighand, newsighand);
764 recalc_sigpending(); 764 recalc_sigpending();
765 765
766 spin_unlock(&newsighand->siglock); 766 spin_unlock(&newsighand->siglock);
@@ -768,7 +768,7 @@ no_thread_group:
768 write_unlock_irq(&tasklist_lock); 768 write_unlock_irq(&tasklist_lock);
769 769
770 if (atomic_dec_and_test(&oldsighand->count)) 770 if (atomic_dec_and_test(&oldsighand->count))
771 kmem_cache_free(sighand_cachep, oldsighand); 771 sighand_free(oldsighand);
772 } 772 }
773 773
774 BUG_ON(!thread_group_leader(current)); 774 BUG_ON(!thread_group_leader(current));
@@ -780,7 +780,7 @@ no_thread_group:
780 * so that a new one can be started 780 * so that a new one can be started
781 */ 781 */
782 782
783static inline void flush_old_files(struct files_struct * files) 783static void flush_old_files(struct files_struct * files)
784{ 784{
785 long j = -1; 785 long j = -1;
786 struct fdtable *fdt; 786 struct fdtable *fdt;
@@ -964,7 +964,7 @@ int prepare_binprm(struct linux_binprm *bprm)
964 964
965EXPORT_SYMBOL(prepare_binprm); 965EXPORT_SYMBOL(prepare_binprm);
966 966
967static inline int unsafe_exec(struct task_struct *p) 967static int unsafe_exec(struct task_struct *p)
968{ 968{
969 int unsafe = 0; 969 int unsafe = 0;
970 if (p->ptrace & PT_PTRACED) { 970 if (p->ptrace & PT_PTRACED) {
@@ -1462,6 +1462,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1462 if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) { 1462 if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
1463 current->signal->flags = SIGNAL_GROUP_EXIT; 1463 current->signal->flags = SIGNAL_GROUP_EXIT;
1464 current->signal->group_exit_code = exit_code; 1464 current->signal->group_exit_code = exit_code;
1465 current->signal->group_stop_count = 0;
1465 retval = 0; 1466 retval = 0;
1466 } 1467 }
1467 spin_unlock_irq(&current->sighand->siglock); 1468 spin_unlock_irq(&current->sighand->siglock);
@@ -1477,7 +1478,6 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1477 * Clear any false indication of pending signals that might 1478 * Clear any false indication of pending signals that might
1478 * be seen by the filesystem code called to write the core file. 1479 * be seen by the filesystem code called to write the core file.
1479 */ 1480 */
1480 current->signal->group_stop_count = 0;
1481 clear_thread_flag(TIF_SIGPENDING); 1481 clear_thread_flag(TIF_SIGPENDING);
1482 1482
1483 if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump) 1483 if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
@@ -1505,7 +1505,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1505 goto close_fail; 1505 goto close_fail;
1506 if (!file->f_op->write) 1506 if (!file->f_op->write)
1507 goto close_fail; 1507 goto close_fail;
1508 if (do_truncate(file->f_dentry, 0, file) != 0) 1508 if (do_truncate(file->f_dentry, 0, 0, file) != 0)
1509 goto close_fail; 1509 goto close_fail;
1510 1510
1511 retval = binfmt->core_dump(signr, regs, file); 1511 retval = binfmt->core_dump(signr, regs, file);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index c49d6254379a..b06b54f1bbbb 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -11,6 +11,33 @@ struct export_operations export_op_default;
11 11
12#define dprintk(fmt, args...) do{}while(0) 12#define dprintk(fmt, args...) do{}while(0)
13 13
14static struct dentry *
15find_acceptable_alias(struct dentry *result,
16 int (*acceptable)(void *context, struct dentry *dentry),
17 void *context)
18{
19 struct dentry *dentry, *toput = NULL;
20
21 spin_lock(&dcache_lock);
22 list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) {
23 dget_locked(dentry);
24 spin_unlock(&dcache_lock);
25 if (toput)
26 dput(toput);
27 if (dentry != result && acceptable(context, dentry)) {
28 dput(result);
29 return dentry;
30 }
31 spin_lock(&dcache_lock);
32 toput = dentry;
33 }
34 spin_unlock(&dcache_lock);
35
36 if (toput)
37 dput(toput);
38 return NULL;
39}
40
14/** 41/**
15 * find_exported_dentry - helper routine to implement export_operations->decode_fh 42 * find_exported_dentry - helper routine to implement export_operations->decode_fh
16 * @sb: The &super_block identifying the filesystem 43 * @sb: The &super_block identifying the filesystem
@@ -52,8 +79,7 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
52 struct dentry *target_dir; 79 struct dentry *target_dir;
53 int err; 80 int err;
54 struct export_operations *nops = sb->s_export_op; 81 struct export_operations *nops = sb->s_export_op;
55 struct list_head *le, *head; 82 struct dentry *alias;
56 struct dentry *toput = NULL;
57 int noprogress; 83 int noprogress;
58 char nbuf[NAME_MAX+1]; 84 char nbuf[NAME_MAX+1];
59 85
@@ -79,27 +105,10 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
79 /* there is no other dentry, so fail */ 105 /* there is no other dentry, so fail */
80 goto err_result; 106 goto err_result;
81 } 107 }
82 /* try any other aliases */ 108
83 spin_lock(&dcache_lock); 109 alias = find_acceptable_alias(result, acceptable, context);
84 head = &result->d_inode->i_dentry; 110 if (alias)
85 list_for_each(le, head) { 111 return alias;
86 struct dentry *dentry = list_entry(le, struct dentry, d_alias);
87 dget_locked(dentry);
88 spin_unlock(&dcache_lock);
89 if (toput)
90 dput(toput);
91 toput = NULL;
92 if (dentry != result &&
93 acceptable(context, dentry)) {
94 dput(result);
95 return dentry;
96 }
97 spin_lock(&dcache_lock);
98 toput = dentry;
99 }
100 spin_unlock(&dcache_lock);
101 if (toput)
102 dput(toput);
103 } 112 }
104 113
105 /* It's a directory, or we are required to confirm the file's 114 /* It's a directory, or we are required to confirm the file's
@@ -177,9 +186,9 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
177 struct dentry *ppd; 186 struct dentry *ppd;
178 struct dentry *npd; 187 struct dentry *npd;
179 188
180 down(&pd->d_inode->i_sem); 189 mutex_lock(&pd->d_inode->i_mutex);
181 ppd = CALL(nops,get_parent)(pd); 190 ppd = CALL(nops,get_parent)(pd);
182 up(&pd->d_inode->i_sem); 191 mutex_unlock(&pd->d_inode->i_mutex);
183 192
184 if (IS_ERR(ppd)) { 193 if (IS_ERR(ppd)) {
185 err = PTR_ERR(ppd); 194 err = PTR_ERR(ppd);
@@ -201,9 +210,9 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
201 break; 210 break;
202 } 211 }
203 dprintk("find_exported_dentry: found name: %s\n", nbuf); 212 dprintk("find_exported_dentry: found name: %s\n", nbuf);
204 down(&ppd->d_inode->i_sem); 213 mutex_lock(&ppd->d_inode->i_mutex);
205 npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); 214 npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
206 up(&ppd->d_inode->i_sem); 215 mutex_unlock(&ppd->d_inode->i_mutex);
207 if (IS_ERR(npd)) { 216 if (IS_ERR(npd)) {
208 err = PTR_ERR(npd); 217 err = PTR_ERR(npd);
209 dprintk("find_exported_dentry: lookup failed: %d\n", err); 218 dprintk("find_exported_dentry: lookup failed: %d\n", err);
@@ -242,9 +251,9 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
242 struct dentry *nresult; 251 struct dentry *nresult;
243 err = CALL(nops,get_name)(target_dir, nbuf, result); 252 err = CALL(nops,get_name)(target_dir, nbuf, result);
244 if (!err) { 253 if (!err) {
245 down(&target_dir->d_inode->i_sem); 254 mutex_lock(&target_dir->d_inode->i_mutex);
246 nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf)); 255 nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf));
247 up(&target_dir->d_inode->i_sem); 256 mutex_unlock(&target_dir->d_inode->i_mutex);
248 if (!IS_ERR(nresult)) { 257 if (!IS_ERR(nresult)) {
249 if (nresult->d_inode) { 258 if (nresult->d_inode) {
250 dput(result); 259 dput(result);
@@ -258,26 +267,10 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent,
258 /* now result is properly connected, it is our best bet */ 267 /* now result is properly connected, it is our best bet */
259 if (acceptable(context, result)) 268 if (acceptable(context, result))
260 return result; 269 return result;
261 /* one last try of the aliases.. */ 270
262 spin_lock(&dcache_lock); 271 alias = find_acceptable_alias(result, acceptable, context);
263 toput = NULL; 272 if (alias)
264 head = &result->d_inode->i_dentry; 273 return alias;
265 list_for_each(le, head) {
266 struct dentry *dentry = list_entry(le, struct dentry, d_alias);
267 dget_locked(dentry);
268 spin_unlock(&dcache_lock);
269 if (toput) dput(toput);
270 if (dentry != result &&
271 acceptable(context, dentry)) {
272 dput(result);
273 return dentry;
274 }
275 spin_lock(&dcache_lock);
276 toput = dentry;
277 }
278 spin_unlock(&dcache_lock);
279 if (toput)
280 dput(toput);
281 274
282 /* drat - I just cannot find anything acceptable */ 275 /* drat - I just cannot find anything acceptable */
283 dput(result); 276 dput(result);
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 6af2f4130290..35acc43b897f 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -4,6 +4,7 @@
4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
5 */ 5 */
6 6
7#include <linux/capability.h>
7#include <linux/init.h> 8#include <linux/init.h>
8#include <linux/sched.h> 9#include <linux/sched.h>
9#include <linux/slab.h> 10#include <linux/slab.h>
@@ -149,7 +150,7 @@ ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl,
149} 150}
150 151
151/* 152/*
152 * inode->i_sem: don't care 153 * inode->i_mutex: don't care
153 */ 154 */
154static struct posix_acl * 155static struct posix_acl *
155ext2_get_acl(struct inode *inode, int type) 156ext2_get_acl(struct inode *inode, int type)
@@ -211,7 +212,7 @@ ext2_get_acl(struct inode *inode, int type)
211} 212}
212 213
213/* 214/*
214 * inode->i_sem: down 215 * inode->i_mutex: down
215 */ 216 */
216static int 217static int
217ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) 218ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
@@ -301,8 +302,8 @@ ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
301/* 302/*
302 * Initialize the ACLs of a new inode. Called from ext2_new_inode. 303 * Initialize the ACLs of a new inode. Called from ext2_new_inode.
303 * 304 *
304 * dir->i_sem: down 305 * dir->i_mutex: down
305 * inode->i_sem: up (access to inode is still exclusive) 306 * inode->i_mutex: up (access to inode is still exclusive)
306 */ 307 */
307int 308int
308ext2_init_acl(struct inode *inode, struct inode *dir) 309ext2_init_acl(struct inode *inode, struct inode *dir)
@@ -361,7 +362,7 @@ cleanup:
361 * for directories) are added. There are no more bits available in the 362 * for directories) are added. There are no more bits available in the
362 * file mode. 363 * file mode.
363 * 364 *
364 * inode->i_sem: down 365 * inode->i_mutex: down
365 */ 366 */
366int 367int
367ext2_acl_chmod(struct inode *inode) 368ext2_acl_chmod(struct inode *inode)
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index bb6908066494..2c00953d4b0b 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -16,6 +16,7 @@
16#include <linux/quotaops.h> 16#include <linux/quotaops.h>
17#include <linux/sched.h> 17#include <linux/sched.h>
18#include <linux/buffer_head.h> 18#include <linux/buffer_head.h>
19#include <linux/capability.h>
19 20
20/* 21/*
21 * balloc.c contains the blocks allocation and deallocation routines 22 * balloc.c contains the blocks allocation and deallocation routines
diff --git a/fs/ext2/bitmap.c b/fs/ext2/bitmap.c
index 20145b74623f..e9983a0dd396 100644
--- a/fs/ext2/bitmap.c
+++ b/fs/ext2/bitmap.c
@@ -7,8 +7,12 @@
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 */ 8 */
9 9
10#ifdef EXT2FS_DEBUG
11
10#include <linux/buffer_head.h> 12#include <linux/buffer_head.h>
11 13
14#include "ext2.h"
15
12static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; 16static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
13 17
14unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) 18unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
@@ -23,3 +27,6 @@ unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
23 nibblemap[(map->b_data[i] >> 4) & 0xf]; 27 nibblemap[(map->b_data[i] >> 4) & 0xf];
24 return (sum); 28 return (sum);
25} 29}
30
31#endif /* EXT2FS_DEBUG */
32
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 5b5f52876b42..7442bdd1267a 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -592,7 +592,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
592 goto fail; 592 goto fail;
593 } 593 }
594 kaddr = kmap_atomic(page, KM_USER0); 594 kaddr = kmap_atomic(page, KM_USER0);
595 memset(kaddr, 0, chunk_size); 595 memset(kaddr, 0, chunk_size);
596 de = (struct ext2_dir_entry_2 *)kaddr; 596 de = (struct ext2_dir_entry_2 *)kaddr;
597 de->name_len = 1; 597 de->name_len = 1;
598 de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1)); 598 de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index e977f8566d14..00de0a7312a2 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -53,7 +53,7 @@ struct ext2_inode_info {
53#ifdef CONFIG_EXT2_FS_XATTR 53#ifdef CONFIG_EXT2_FS_XATTR
54 /* 54 /*
55 * Extended attributes can be read independently of the main file 55 * Extended attributes can be read independently of the main file
56 * data. Taking i_sem even when reading would cause contention 56 * data. Taking i_mutex even when reading would cause contention
57 * between readers of EAs and writers of regular file data, so 57 * between readers of EAs and writers of regular file data, so
58 * instead we synchronize on xattr_sem when reading or changing 58 * instead we synchronize on xattr_sem when reading or changing
59 * EAs. 59 * EAs.
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 709d8676b962..3ca9afdf713d 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -8,6 +8,7 @@
8 */ 8 */
9 9
10#include "ext2.h" 10#include "ext2.h"
11#include <linux/capability.h>
11#include <linux/time.h> 12#include <linux/time.h>
12#include <linux/sched.h> 13#include <linux/sched.h>
13#include <asm/current.h> 14#include <asm/current.h>
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index c5513953c825..ad1432a2a62e 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -83,10 +83,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
83 if (!inode) 83 if (!inode)
84 return ERR_PTR(-EACCES); 84 return ERR_PTR(-EACCES);
85 } 85 }
86 if (inode) 86 return d_splice_alias(inode, dentry);
87 return d_splice_alias(inode, dentry);
88 d_add(dentry, inode);
89 return NULL;
90} 87}
91 88
92struct dentry *ext2_get_parent(struct dentry *child) 89struct dentry *ext2_get_parent(struct dentry *child)
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 522fa70dd8ea..8d6819846fc9 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1152,7 +1152,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
1152 struct buffer_head tmp_bh; 1152 struct buffer_head tmp_bh;
1153 struct buffer_head *bh; 1153 struct buffer_head *bh;
1154 1154
1155 down(&inode->i_sem); 1155 mutex_lock(&inode->i_mutex);
1156 while (towrite > 0) { 1156 while (towrite > 0) {
1157 tocopy = sb->s_blocksize - offset < towrite ? 1157 tocopy = sb->s_blocksize - offset < towrite ?
1158 sb->s_blocksize - offset : towrite; 1158 sb->s_blocksize - offset : towrite;
@@ -1189,7 +1189,7 @@ out:
1189 inode->i_version++; 1189 inode->i_version++;
1190 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 1190 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1191 mark_inode_dirty(inode); 1191 mark_inode_dirty(inode);
1192 up(&inode->i_sem); 1192 mutex_unlock(&inode->i_mutex);
1193 return len - towrite; 1193 return len - towrite;
1194} 1194}
1195 1195
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 0099462d4271..a2ca3107d475 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -325,7 +325,7 @@ cleanup:
325/* 325/*
326 * Inode operation listxattr() 326 * Inode operation listxattr()
327 * 327 *
328 * dentry->d_inode->i_sem: don't care 328 * dentry->d_inode->i_mutex: don't care
329 */ 329 */
330ssize_t 330ssize_t
331ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) 331ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
@@ -389,10 +389,6 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name,
389 ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", 389 ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
390 name_index, name, value, (long)value_len); 390 name_index, name, value, (long)value_len);
391 391
392 if (IS_RDONLY(inode))
393 return -EROFS;
394 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
395 return -EPERM;
396 if (value == NULL) 392 if (value == NULL)
397 value_len = 0; 393 value_len = 0;
398 if (name == NULL) 394 if (name == NULL)
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 52b30ee6a25f..f28a6a499c96 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -7,6 +7,7 @@
7 7
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/capability.h>
10#include <linux/fs.h> 11#include <linux/fs.h>
11#include <linux/smp_lock.h> 12#include <linux/smp_lock.h>
12#include <linux/ext2_fs.h> 13#include <linux/ext2_fs.h>
@@ -38,8 +39,6 @@ ext2_xattr_trusted_get(struct inode *inode, const char *name,
38{ 39{
39 if (strcmp(name, "") == 0) 40 if (strcmp(name, "") == 0)
40 return -EINVAL; 41 return -EINVAL;
41 if (!capable(CAP_SYS_ADMIN))
42 return -EPERM;
43 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name, 42 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name,
44 buffer, size); 43 buffer, size);
45} 44}
@@ -50,8 +49,6 @@ ext2_xattr_trusted_set(struct inode *inode, const char *name,
50{ 49{
51 if (strcmp(name, "") == 0) 50 if (strcmp(name, "") == 0)
52 return -EINVAL; 51 return -EINVAL;
53 if (!capable(CAP_SYS_ADMIN))
54 return -EPERM;
55 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name, 52 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name,
56 value, size, flags); 53 value, size, flags);
57} 54}
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 0c03ea131a94..f383e7c3a7b5 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -35,16 +35,10 @@ static int
35ext2_xattr_user_get(struct inode *inode, const char *name, 35ext2_xattr_user_get(struct inode *inode, const char *name,
36 void *buffer, size_t size) 36 void *buffer, size_t size)
37{ 37{
38 int error;
39
40 if (strcmp(name, "") == 0) 38 if (strcmp(name, "") == 0)
41 return -EINVAL; 39 return -EINVAL;
42 if (!test_opt(inode->i_sb, XATTR_USER)) 40 if (!test_opt(inode->i_sb, XATTR_USER))
43 return -EOPNOTSUPP; 41 return -EOPNOTSUPP;
44 error = permission(inode, MAY_READ, NULL);
45 if (error)
46 return error;
47
48 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size); 42 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size);
49} 43}
50 44
@@ -52,18 +46,10 @@ static int
52ext2_xattr_user_set(struct inode *inode, const char *name, 46ext2_xattr_user_set(struct inode *inode, const char *name,
53 const void *value, size_t size, int flags) 47 const void *value, size_t size, int flags)
54{ 48{
55 int error;
56
57 if (strcmp(name, "") == 0) 49 if (strcmp(name, "") == 0)
58 return -EINVAL; 50 return -EINVAL;
59 if (!test_opt(inode->i_sb, XATTR_USER)) 51 if (!test_opt(inode->i_sb, XATTR_USER))
60 return -EOPNOTSUPP; 52 return -EOPNOTSUPP;
61 if ( !S_ISREG(inode->i_mode) &&
62 (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
63 return -EPERM;
64 error = permission(inode, MAY_WRITE, NULL);
65 if (error)
66 return error;
67 53
68 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, 54 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name,
69 value, size, flags); 55 value, size, flags);
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 3ac38266fc9e..47a9da2dfb4f 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -7,6 +7,7 @@
7#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/capability.h>
10#include <linux/fs.h> 11#include <linux/fs.h>
11#include <linux/ext3_jbd.h> 12#include <linux/ext3_jbd.h>
12#include <linux/ext3_fs.h> 13#include <linux/ext3_fs.h>
@@ -152,7 +153,7 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
152/* 153/*
153 * Inode operation get_posix_acl(). 154 * Inode operation get_posix_acl().
154 * 155 *
155 * inode->i_sem: don't care 156 * inode->i_mutex: don't care
156 */ 157 */
157static struct posix_acl * 158static struct posix_acl *
158ext3_get_acl(struct inode *inode, int type) 159ext3_get_acl(struct inode *inode, int type)
@@ -216,7 +217,7 @@ ext3_get_acl(struct inode *inode, int type)
216/* 217/*
217 * Set the access or default ACL of an inode. 218 * Set the access or default ACL of an inode.
218 * 219 *
219 * inode->i_sem: down unless called from ext3_new_inode 220 * inode->i_mutex: down unless called from ext3_new_inode
220 */ 221 */
221static int 222static int
222ext3_set_acl(handle_t *handle, struct inode *inode, int type, 223ext3_set_acl(handle_t *handle, struct inode *inode, int type,
@@ -306,8 +307,8 @@ ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
306/* 307/*
307 * Initialize the ACLs of a new inode. Called from ext3_new_inode. 308 * Initialize the ACLs of a new inode. Called from ext3_new_inode.
308 * 309 *
309 * dir->i_sem: down 310 * dir->i_mutex: down
310 * inode->i_sem: up (access to inode is still exclusive) 311 * inode->i_mutex: up (access to inode is still exclusive)
311 */ 312 */
312int 313int
313ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) 314ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
@@ -368,7 +369,7 @@ cleanup:
368 * for directories) are added. There are no more bits available in the 369 * for directories) are added. There are no more bits available in the
369 * file mode. 370 * file mode.
370 * 371 *
371 * inode->i_sem: down 372 * inode->i_mutex: down
372 */ 373 */
373int 374int
374ext3_acl_chmod(struct inode *inode) 375ext3_acl_chmod(struct inode *inode)
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index ae1148c24c53..6250fcdf14a1 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -13,6 +13,7 @@
13 13
14#include <linux/config.h> 14#include <linux/config.h>
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/capability.h>
16#include <linux/fs.h> 17#include <linux/fs.h>
17#include <linux/jbd.h> 18#include <linux/jbd.h>
18#include <linux/ext3_fs.h> 19#include <linux/ext3_fs.h>
@@ -20,8 +21,6 @@
20#include <linux/quotaops.h> 21#include <linux/quotaops.h>
21#include <linux/buffer_head.h> 22#include <linux/buffer_head.h>
22 23
23#include "bitmap.h"
24
25/* 24/*
26 * balloc.c contains the blocks allocation and deallocation routines 25 * balloc.c contains the blocks allocation and deallocation routines
27 */ 26 */
diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c
index 5b4ba3e246e6..cb16b4c5d5df 100644
--- a/fs/ext3/bitmap.c
+++ b/fs/ext3/bitmap.c
@@ -7,8 +7,11 @@
7 * Universite Pierre et Marie Curie (Paris VI) 7 * Universite Pierre et Marie Curie (Paris VI)
8 */ 8 */
9 9
10#ifdef EXT3FS_DEBUG
11
10#include <linux/buffer_head.h> 12#include <linux/buffer_head.h>
11#include "bitmap.h" 13
14#include "ext3_fs.h"
12 15
13static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; 16static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
14 17
@@ -24,3 +27,6 @@ unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
24 nibblemap[(map->b_data[i] >> 4) & 0xf]; 27 nibblemap[(map->b_data[i] >> 4) & 0xf];
25 return (sum); 28 return (sum);
26} 29}
30
31#endif /* EXT3FS_DEBUG */
32
diff --git a/fs/ext3/bitmap.h b/fs/ext3/bitmap.h
deleted file mode 100644
index 6ee503a6bb4e..000000000000
--- a/fs/ext3/bitmap.h
+++ /dev/null
@@ -1,8 +0,0 @@
1/* linux/fs/ext3/bitmap.c
2 *
3 * Copyright (C) 2005 Simtec Electronics
4 * Ben Dooks <ben@simtec.co.uk>
5 *
6*/
7
8extern unsigned long ext3_count_free (struct buffer_head *, unsigned int );
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 9e4a24376210..dc826464f313 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -26,7 +26,6 @@
26 26
27#include <asm/byteorder.h> 27#include <asm/byteorder.h>
28 28
29#include "bitmap.h"
30#include "xattr.h" 29#include "xattr.h"
31#include "acl.h" 30#include "acl.h"
32 31
@@ -651,7 +650,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
651 /* Error cases - e2fsck has already cleaned up for us */ 650 /* Error cases - e2fsck has already cleaned up for us */
652 if (ino > max_ino) { 651 if (ino > max_ino) {
653 ext3_warning(sb, __FUNCTION__, 652 ext3_warning(sb, __FUNCTION__,
654 "bad orphan ino %lu! e2fsck was run?\n", ino); 653 "bad orphan ino %lu! e2fsck was run?", ino);
655 goto out; 654 goto out;
656 } 655 }
657 656
@@ -660,7 +659,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
660 bitmap_bh = read_inode_bitmap(sb, block_group); 659 bitmap_bh = read_inode_bitmap(sb, block_group);
661 if (!bitmap_bh) { 660 if (!bitmap_bh) {
662 ext3_warning(sb, __FUNCTION__, 661 ext3_warning(sb, __FUNCTION__,
663 "inode bitmap error for orphan %lu\n", ino); 662 "inode bitmap error for orphan %lu", ino);
664 goto out; 663 goto out;
665 } 664 }
666 665
@@ -672,7 +671,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
672 !(inode = iget(sb, ino)) || is_bad_inode(inode) || 671 !(inode = iget(sb, ino)) || is_bad_inode(inode) ||
673 NEXT_ORPHAN(inode) > max_ino) { 672 NEXT_ORPHAN(inode) > max_ino) {
674 ext3_warning(sb, __FUNCTION__, 673 ext3_warning(sb, __FUNCTION__,
675 "bad orphan inode %lu! e2fsck was run?\n", ino); 674 "bad orphan inode %lu! e2fsck was run?", ino);
676 printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n", 675 printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
677 bit, (unsigned long long)bitmap_bh->b_blocknr, 676 bit, (unsigned long long)bitmap_bh->b_blocknr,
678 ext3_test_bit(bit, bitmap_bh->b_data)); 677 ext3_test_bit(bit, bitmap_bh->b_data));
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 706d68608381..556cd5510078 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -9,6 +9,7 @@
9 9
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/jbd.h> 11#include <linux/jbd.h>
12#include <linux/capability.h>
12#include <linux/ext3_fs.h> 13#include <linux/ext3_fs.h>
13#include <linux/ext3_jbd.h> 14#include <linux/ext3_jbd.h>
14#include <linux/time.h> 15#include <linux/time.h>
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b3c690a3b54a..8bd8ac077704 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1005,10 +1005,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
1005 if (!inode) 1005 if (!inode)
1006 return ERR_PTR(-EACCES); 1006 return ERR_PTR(-EACCES);
1007 } 1007 }
1008 if (inode) 1008 return d_splice_alias(inode, dentry);
1009 return d_splice_alias(inode, dentry);
1010 d_add(dentry, inode);
1011 return NULL;
1012} 1009}
1013 1010
1014 1011
@@ -1476,7 +1473,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
1476 if (levels && (dx_get_count(frames->entries) == 1473 if (levels && (dx_get_count(frames->entries) ==
1477 dx_get_limit(frames->entries))) { 1474 dx_get_limit(frames->entries))) {
1478 ext3_warning(sb, __FUNCTION__, 1475 ext3_warning(sb, __FUNCTION__,
1479 "Directory index full!\n"); 1476 "Directory index full!");
1480 err = -ENOSPC; 1477 err = -ENOSPC;
1481 goto cleanup; 1478 goto cleanup;
1482 } 1479 }
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 6104ad310507..1041dab6de2f 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -31,7 +31,7 @@ static int verify_group_input(struct super_block *sb,
31 unsigned start = le32_to_cpu(es->s_blocks_count); 31 unsigned start = le32_to_cpu(es->s_blocks_count);
32 unsigned end = start + input->blocks_count; 32 unsigned end = start + input->blocks_count;
33 unsigned group = input->group; 33 unsigned group = input->group;
34 unsigned itend = input->inode_table + EXT3_SB(sb)->s_itb_per_group; 34 unsigned itend = input->inode_table + sbi->s_itb_per_group;
35 unsigned overhead = ext3_bg_has_super(sb, group) ? 35 unsigned overhead = ext3_bg_has_super(sb, group) ?
36 (1 + ext3_bg_num_gdb(sb, group) + 36 (1 + ext3_bg_num_gdb(sb, group) +
37 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; 37 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
@@ -340,7 +340,7 @@ static int verify_reserved_gdb(struct super_block *sb,
340 while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) { 340 while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
341 if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){ 341 if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
342 ext3_warning(sb, __FUNCTION__, 342 ext3_warning(sb, __FUNCTION__,
343 "reserved GDT %ld missing grp %d (%ld)\n", 343 "reserved GDT %ld missing grp %d (%ld)",
344 blk, grp, 344 blk, grp,
345 grp * EXT3_BLOCKS_PER_GROUP(sb) + blk); 345 grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
346 return -EINVAL; 346 return -EINVAL;
@@ -393,7 +393,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
393 if (EXT3_SB(sb)->s_sbh->b_blocknr != 393 if (EXT3_SB(sb)->s_sbh->b_blocknr !=
394 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) { 394 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
395 ext3_warning(sb, __FUNCTION__, 395 ext3_warning(sb, __FUNCTION__,
396 "won't resize using backup superblock at %llu\n", 396 "won't resize using backup superblock at %llu",
397 (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr); 397 (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
398 return -EPERM; 398 return -EPERM;
399 } 399 }
@@ -417,7 +417,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
417 data = (__u32 *)dind->b_data; 417 data = (__u32 *)dind->b_data;
418 if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) { 418 if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
419 ext3_warning(sb, __FUNCTION__, 419 ext3_warning(sb, __FUNCTION__,
420 "new group %u GDT block %lu not reserved\n", 420 "new group %u GDT block %lu not reserved",
421 input->group, gdblock); 421 input->group, gdblock);
422 err = -EINVAL; 422 err = -EINVAL;
423 goto exit_dind; 423 goto exit_dind;
@@ -540,7 +540,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
540 for (res = 0; res < reserved_gdb; res++, blk++) { 540 for (res = 0; res < reserved_gdb; res++, blk++) {
541 if (le32_to_cpu(*data) != blk) { 541 if (le32_to_cpu(*data) != blk) {
542 ext3_warning(sb, __FUNCTION__, 542 ext3_warning(sb, __FUNCTION__,
543 "reserved block %lu not at offset %ld\n", 543 "reserved block %lu not at offset %ld",
544 blk, (long)(data - (__u32 *)dind->b_data)); 544 blk, (long)(data - (__u32 *)dind->b_data));
545 err = -EINVAL; 545 err = -EINVAL;
546 goto exit_bh; 546 goto exit_bh;
@@ -683,7 +683,7 @@ exit_err:
683 if (err) { 683 if (err) {
684 ext3_warning(sb, __FUNCTION__, 684 ext3_warning(sb, __FUNCTION__,
685 "can't update backup for group %d (err %d), " 685 "can't update backup for group %d (err %d), "
686 "forcing fsck on next reboot\n", group, err); 686 "forcing fsck on next reboot", group, err);
687 sbi->s_mount_state &= ~EXT3_VALID_FS; 687 sbi->s_mount_state &= ~EXT3_VALID_FS;
688 sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS); 688 sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS);
689 mark_buffer_dirty(sbi->s_sbh); 689 mark_buffer_dirty(sbi->s_sbh);
@@ -722,7 +722,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
722 if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb, 722 if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
723 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 723 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
724 ext3_warning(sb, __FUNCTION__, 724 ext3_warning(sb, __FUNCTION__,
725 "Can't resize non-sparse filesystem further\n"); 725 "Can't resize non-sparse filesystem further");
726 return -EPERM; 726 return -EPERM;
727 } 727 }
728 728
@@ -730,13 +730,13 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
730 if (!EXT3_HAS_COMPAT_FEATURE(sb, 730 if (!EXT3_HAS_COMPAT_FEATURE(sb,
731 EXT3_FEATURE_COMPAT_RESIZE_INODE)){ 731 EXT3_FEATURE_COMPAT_RESIZE_INODE)){
732 ext3_warning(sb, __FUNCTION__, 732 ext3_warning(sb, __FUNCTION__,
733 "No reserved GDT blocks, can't resize\n"); 733 "No reserved GDT blocks, can't resize");
734 return -EPERM; 734 return -EPERM;
735 } 735 }
736 inode = iget(sb, EXT3_RESIZE_INO); 736 inode = iget(sb, EXT3_RESIZE_INO);
737 if (!inode || is_bad_inode(inode)) { 737 if (!inode || is_bad_inode(inode)) {
738 ext3_warning(sb, __FUNCTION__, 738 ext3_warning(sb, __FUNCTION__,
739 "Error opening resize inode\n"); 739 "Error opening resize inode");
740 iput(inode); 740 iput(inode);
741 return -ENOENT; 741 return -ENOENT;
742 } 742 }
@@ -764,9 +764,9 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
764 } 764 }
765 765
766 lock_super(sb); 766 lock_super(sb);
767 if (input->group != EXT3_SB(sb)->s_groups_count) { 767 if (input->group != sbi->s_groups_count) {
768 ext3_warning(sb, __FUNCTION__, 768 ext3_warning(sb, __FUNCTION__,
769 "multiple resizers run on filesystem!\n"); 769 "multiple resizers run on filesystem!");
770 err = -EBUSY; 770 err = -EBUSY;
771 goto exit_journal; 771 goto exit_journal;
772 } 772 }
@@ -799,7 +799,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
799 * data. So we need to be careful to set all of the relevant 799 * data. So we need to be careful to set all of the relevant
800 * group descriptor data etc. *before* we enable the group. 800 * group descriptor data etc. *before* we enable the group.
801 * 801 *
802 * The key field here is EXT3_SB(sb)->s_groups_count: as long as 802 * The key field here is sbi->s_groups_count: as long as
803 * that retains its old value, nobody is going to access the new 803 * that retains its old value, nobody is going to access the new
804 * group. 804 * group.
805 * 805 *
@@ -859,7 +859,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
859 smp_wmb(); 859 smp_wmb();
860 860
861 /* Update the global fs size fields */ 861 /* Update the global fs size fields */
862 EXT3_SB(sb)->s_groups_count++; 862 sbi->s_groups_count++;
863 863
864 ext3_journal_dirty_metadata(handle, primary); 864 ext3_journal_dirty_metadata(handle, primary);
865 865
@@ -874,7 +874,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
874 percpu_counter_mod(&sbi->s_freeinodes_counter, 874 percpu_counter_mod(&sbi->s_freeinodes_counter,
875 EXT3_INODES_PER_GROUP(sb)); 875 EXT3_INODES_PER_GROUP(sb));
876 876
877 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 877 ext3_journal_dirty_metadata(handle, sbi->s_sbh);
878 sb->s_dirt = 1; 878 sb->s_dirt = 1;
879 879
880exit_journal: 880exit_journal:
@@ -937,7 +937,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
937 937
938 if (last == 0) { 938 if (last == 0) {
939 ext3_warning(sb, __FUNCTION__, 939 ext3_warning(sb, __FUNCTION__,
940 "need to use ext2online to resize further\n"); 940 "need to use ext2online to resize further");
941 return -EPERM; 941 return -EPERM;
942 } 942 }
943 943
@@ -973,7 +973,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
973 lock_super(sb); 973 lock_super(sb);
974 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { 974 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
975 ext3_warning(sb, __FUNCTION__, 975 ext3_warning(sb, __FUNCTION__,
976 "multiple resizers run on filesystem!\n"); 976 "multiple resizers run on filesystem!");
977 err = -EBUSY; 977 err = -EBUSY;
978 goto exit_put; 978 goto exit_put;
979 } 979 }
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 4e6730622d90..56bf76586019 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -43,7 +43,8 @@
43#include "acl.h" 43#include "acl.h"
44#include "namei.h" 44#include "namei.h"
45 45
46static int ext3_load_journal(struct super_block *, struct ext3_super_block *); 46static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
47 unsigned long journal_devnum);
47static int ext3_create_journal(struct super_block *, struct ext3_super_block *, 48static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
48 int); 49 int);
49static void ext3_commit_super (struct super_block * sb, 50static void ext3_commit_super (struct super_block * sb,
@@ -628,7 +629,7 @@ enum {
628 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 629 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
629 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 630 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
630 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, 631 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh,
631 Opt_commit, Opt_journal_update, Opt_journal_inum, 632 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
632 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 633 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
633 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 634 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
634 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 635 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
@@ -666,6 +667,7 @@ static match_table_t tokens = {
666 {Opt_commit, "commit=%u"}, 667 {Opt_commit, "commit=%u"},
667 {Opt_journal_update, "journal=update"}, 668 {Opt_journal_update, "journal=update"},
668 {Opt_journal_inum, "journal=%u"}, 669 {Opt_journal_inum, "journal=%u"},
670 {Opt_journal_dev, "journal_dev=%u"},
669 {Opt_abort, "abort"}, 671 {Opt_abort, "abort"},
670 {Opt_data_journal, "data=journal"}, 672 {Opt_data_journal, "data=journal"},
671 {Opt_data_ordered, "data=ordered"}, 673 {Opt_data_ordered, "data=ordered"},
@@ -705,8 +707,9 @@ static unsigned long get_sb_block(void **data)
705 return sb_block; 707 return sb_block;
706} 708}
707 709
708static int parse_options (char * options, struct super_block *sb, 710static int parse_options (char *options, struct super_block *sb,
709 unsigned long * inum, unsigned long *n_blocks_count, int is_remount) 711 unsigned long *inum, unsigned long *journal_devnum,
712 unsigned long *n_blocks_count, int is_remount)
710{ 713{
711 struct ext3_sb_info *sbi = EXT3_SB(sb); 714 struct ext3_sb_info *sbi = EXT3_SB(sb);
712 char * p; 715 char * p;
@@ -839,6 +842,16 @@ static int parse_options (char * options, struct super_block *sb,
839 return 0; 842 return 0;
840 *inum = option; 843 *inum = option;
841 break; 844 break;
845 case Opt_journal_dev:
846 if (is_remount) {
847 printk(KERN_ERR "EXT3-fs: cannot specify "
848 "journal on remount\n");
849 return 0;
850 }
851 if (match_int(&args[0], &option))
852 return 0;
853 *journal_devnum = option;
854 break;
842 case Opt_noload: 855 case Opt_noload:
843 set_opt (sbi->s_mount_opt, NOLOAD); 856 set_opt (sbi->s_mount_opt, NOLOAD);
844 break; 857 break;
@@ -1331,6 +1344,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1331 unsigned long logic_sb_block; 1344 unsigned long logic_sb_block;
1332 unsigned long offset = 0; 1345 unsigned long offset = 0;
1333 unsigned long journal_inum = 0; 1346 unsigned long journal_inum = 0;
1347 unsigned long journal_devnum = 0;
1334 unsigned long def_mount_opts; 1348 unsigned long def_mount_opts;
1335 struct inode *root; 1349 struct inode *root;
1336 int blocksize; 1350 int blocksize;
@@ -1411,7 +1425,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1411 1425
1412 set_opt(sbi->s_mount_opt, RESERVATION); 1426 set_opt(sbi->s_mount_opt, RESERVATION);
1413 1427
1414 if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0)) 1428 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
1429 NULL, 0))
1415 goto failed_mount; 1430 goto failed_mount;
1416 1431
1417 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1432 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -1622,7 +1637,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1622 */ 1637 */
1623 if (!test_opt(sb, NOLOAD) && 1638 if (!test_opt(sb, NOLOAD) &&
1624 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { 1639 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
1625 if (ext3_load_journal(sb, es)) 1640 if (ext3_load_journal(sb, es, journal_devnum))
1626 goto failed_mount2; 1641 goto failed_mount2;
1627 } else if (journal_inum) { 1642 } else if (journal_inum) {
1628 if (ext3_create_journal(sb, es, journal_inum)) 1643 if (ext3_create_journal(sb, es, journal_inum))
@@ -1902,15 +1917,24 @@ out_bdev:
1902 return NULL; 1917 return NULL;
1903} 1918}
1904 1919
1905static int ext3_load_journal(struct super_block * sb, 1920static int ext3_load_journal(struct super_block *sb,
1906 struct ext3_super_block * es) 1921 struct ext3_super_block *es,
1922 unsigned long journal_devnum)
1907{ 1923{
1908 journal_t *journal; 1924 journal_t *journal;
1909 int journal_inum = le32_to_cpu(es->s_journal_inum); 1925 int journal_inum = le32_to_cpu(es->s_journal_inum);
1910 dev_t journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 1926 dev_t journal_dev;
1911 int err = 0; 1927 int err = 0;
1912 int really_read_only; 1928 int really_read_only;
1913 1929
1930 if (journal_devnum &&
1931 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
1932 printk(KERN_INFO "EXT3-fs: external journal device major/minor "
1933 "numbers have changed\n");
1934 journal_dev = new_decode_dev(journal_devnum);
1935 } else
1936 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
1937
1914 really_read_only = bdev_read_only(sb->s_bdev); 1938 really_read_only = bdev_read_only(sb->s_bdev);
1915 1939
1916 /* 1940 /*
@@ -1969,6 +1993,16 @@ static int ext3_load_journal(struct super_block * sb,
1969 1993
1970 EXT3_SB(sb)->s_journal = journal; 1994 EXT3_SB(sb)->s_journal = journal;
1971 ext3_clear_journal_err(sb, es); 1995 ext3_clear_journal_err(sb, es);
1996
1997 if (journal_devnum &&
1998 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
1999 es->s_journal_dev = cpu_to_le32(journal_devnum);
2000 sb->s_dirt = 1;
2001
2002 /* Make sure we flush the recovery flag to disk. */
2003 ext3_commit_super(sb, es, 1);
2004 }
2005
1972 return 0; 2006 return 0;
1973} 2007}
1974 2008
@@ -2116,7 +2150,7 @@ int ext3_force_commit(struct super_block *sb)
2116 2150
2117static void ext3_write_super (struct super_block * sb) 2151static void ext3_write_super (struct super_block * sb)
2118{ 2152{
2119 if (down_trylock(&sb->s_lock) == 0) 2153 if (mutex_trylock(&sb->s_lock) != 0)
2120 BUG(); 2154 BUG();
2121 sb->s_dirt = 0; 2155 sb->s_dirt = 0;
2122} 2156}
@@ -2197,7 +2231,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2197 /* 2231 /*
2198 * Allow the "check" option to be passed as a remount option. 2232 * Allow the "check" option to be passed as a remount option.
2199 */ 2233 */
2200 if (!parse_options(data, sb, NULL, &n_blocks_count, 1)) { 2234 if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
2201 err = -EINVAL; 2235 err = -EINVAL;
2202 goto restore_opts; 2236 goto restore_opts;
2203 } 2237 }
@@ -2567,7 +2601,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
2567 struct buffer_head *bh; 2601 struct buffer_head *bh;
2568 handle_t *handle = journal_current_handle(); 2602 handle_t *handle = journal_current_handle();
2569 2603
2570 down(&inode->i_sem); 2604 mutex_lock(&inode->i_mutex);
2571 while (towrite > 0) { 2605 while (towrite > 0) {
2572 tocopy = sb->s_blocksize - offset < towrite ? 2606 tocopy = sb->s_blocksize - offset < towrite ?
2573 sb->s_blocksize - offset : towrite; 2607 sb->s_blocksize - offset : towrite;
@@ -2610,7 +2644,7 @@ out:
2610 inode->i_version++; 2644 inode->i_version++;
2611 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2645 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2612 ext3_mark_inode_dirty(handle, inode); 2646 ext3_mark_inode_dirty(handle, inode);
2613 up(&inode->i_sem); 2647 mutex_unlock(&inode->i_mutex);
2614 return len - towrite; 2648 return len - towrite;
2615} 2649}
2616 2650
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 430de9f63be3..e8d60bf6b7df 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -140,7 +140,7 @@ ext3_xattr_handler(int name_index)
140/* 140/*
141 * Inode operation listxattr() 141 * Inode operation listxattr()
142 * 142 *
143 * dentry->d_inode->i_sem: don't care 143 * dentry->d_inode->i_mutex: don't care
144 */ 144 */
145ssize_t 145ssize_t
146ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) 146ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
@@ -946,10 +946,6 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
946 }; 946 };
947 int error; 947 int error;
948 948
949 if (IS_RDONLY(inode))
950 return -EROFS;
951 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
952 return -EPERM;
953 if (!name) 949 if (!name)
954 return -EINVAL; 950 return -EINVAL;
955 if (strlen(name) > 255) 951 if (strlen(name) > 255)
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index f68bfd1cf519..86d91f1186dc 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -7,6 +7,7 @@
7 7
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/capability.h>
10#include <linux/fs.h> 11#include <linux/fs.h>
11#include <linux/smp_lock.h> 12#include <linux/smp_lock.h>
12#include <linux/ext3_jbd.h> 13#include <linux/ext3_jbd.h>
@@ -39,8 +40,6 @@ ext3_xattr_trusted_get(struct inode *inode, const char *name,
39{ 40{
40 if (strcmp(name, "") == 0) 41 if (strcmp(name, "") == 0)
41 return -EINVAL; 42 return -EINVAL;
42 if (!capable(CAP_SYS_ADMIN))
43 return -EPERM;
44 return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name, 43 return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name,
45 buffer, size); 44 buffer, size);
46} 45}
@@ -51,8 +50,6 @@ ext3_xattr_trusted_set(struct inode *inode, const char *name,
51{ 50{
52 if (strcmp(name, "") == 0) 51 if (strcmp(name, "") == 0)
53 return -EINVAL; 52 return -EINVAL;
54 if (!capable(CAP_SYS_ADMIN))
55 return -EPERM;
56 return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name, 53 return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name,
57 value, size, flags); 54 value, size, flags);
58} 55}
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index e907cae7a07c..a85a0a17c4fd 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -37,16 +37,10 @@ static int
37ext3_xattr_user_get(struct inode *inode, const char *name, 37ext3_xattr_user_get(struct inode *inode, const char *name,
38 void *buffer, size_t size) 38 void *buffer, size_t size)
39{ 39{
40 int error;
41
42 if (strcmp(name, "") == 0) 40 if (strcmp(name, "") == 0)
43 return -EINVAL; 41 return -EINVAL;
44 if (!test_opt(inode->i_sb, XATTR_USER)) 42 if (!test_opt(inode->i_sb, XATTR_USER))
45 return -EOPNOTSUPP; 43 return -EOPNOTSUPP;
46 error = permission(inode, MAY_READ, NULL);
47 if (error)
48 return error;
49
50 return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size); 44 return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size);
51} 45}
52 46
@@ -54,19 +48,10 @@ static int
54ext3_xattr_user_set(struct inode *inode, const char *name, 48ext3_xattr_user_set(struct inode *inode, const char *name,
55 const void *value, size_t size, int flags) 49 const void *value, size_t size, int flags)
56{ 50{
57 int error;
58
59 if (strcmp(name, "") == 0) 51 if (strcmp(name, "") == 0)
60 return -EINVAL; 52 return -EINVAL;
61 if (!test_opt(inode->i_sb, XATTR_USER)) 53 if (!test_opt(inode->i_sb, XATTR_USER))
62 return -EOPNOTSUPP; 54 return -EOPNOTSUPP;
63 if ( !S_ISREG(inode->i_mode) &&
64 (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
65 return -EPERM;
66 error = permission(inode, MAY_WRITE, NULL);
67 if (error)
68 return error;
69
70 return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name, 55 return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name,
71 value, size, flags); 56 value, size, flags);
72} 57}
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 77c24fcf712a..1acc941245fb 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -295,7 +295,8 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
295 return dclus; 295 return dclus;
296} 296}
297 297
298int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys) 298int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
299 unsigned long *mapped_blocks)
299{ 300{
300 struct super_block *sb = inode->i_sb; 301 struct super_block *sb = inode->i_sb;
301 struct msdos_sb_info *sbi = MSDOS_SB(sb); 302 struct msdos_sb_info *sbi = MSDOS_SB(sb);
@@ -303,9 +304,12 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys)
303 int cluster, offset; 304 int cluster, offset;
304 305
305 *phys = 0; 306 *phys = 0;
307 *mapped_blocks = 0;
306 if ((sbi->fat_bits != 32) && (inode->i_ino == MSDOS_ROOT_INO)) { 308 if ((sbi->fat_bits != 32) && (inode->i_ino == MSDOS_ROOT_INO)) {
307 if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) 309 if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) {
308 *phys = sector + sbi->dir_start; 310 *phys = sector + sbi->dir_start;
311 *mapped_blocks = 1;
312 }
309 return 0; 313 return 0;
310 } 314 }
311 last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1)) 315 last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1))
@@ -318,7 +322,11 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys)
318 cluster = fat_bmap_cluster(inode, cluster); 322 cluster = fat_bmap_cluster(inode, cluster);
319 if (cluster < 0) 323 if (cluster < 0)
320 return cluster; 324 return cluster;
321 else if (cluster) 325 else if (cluster) {
322 *phys = fat_clus_to_blknr(sbi, cluster) + offset; 326 *phys = fat_clus_to_blknr(sbi, cluster) + offset;
327 *mapped_blocks = sbi->sec_per_clus - offset;
328 if (*mapped_blocks > last_block - sector)
329 *mapped_blocks = last_block - sector;
330 }
323 return 0; 331 return 0;
324} 332}
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index ba824964b9bb..db0de5c621c7 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -45,8 +45,8 @@ static inline void fat_dir_readahead(struct inode *dir, sector_t iblock,
45 if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO)) 45 if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO))
46 return; 46 return;
47 47
48 bh = sb_getblk(sb, phys); 48 bh = sb_find_get_block(sb, phys);
49 if (bh && !buffer_uptodate(bh)) { 49 if (bh == NULL || !buffer_uptodate(bh)) {
50 for (sec = 0; sec < sbi->sec_per_clus; sec++) 50 for (sec = 0; sec < sbi->sec_per_clus; sec++)
51 sb_breadahead(sb, phys + sec); 51 sb_breadahead(sb, phys + sec);
52 } 52 }
@@ -68,8 +68,8 @@ static int fat__get_entry(struct inode *dir, loff_t *pos,
68{ 68{
69 struct super_block *sb = dir->i_sb; 69 struct super_block *sb = dir->i_sb;
70 sector_t phys, iblock; 70 sector_t phys, iblock;
71 int offset; 71 unsigned long mapped_blocks;
72 int err; 72 int err, offset;
73 73
74next: 74next:
75 if (*bh) 75 if (*bh)
@@ -77,7 +77,7 @@ next:
77 77
78 *bh = NULL; 78 *bh = NULL;
79 iblock = *pos >> sb->s_blocksize_bits; 79 iblock = *pos >> sb->s_blocksize_bits;
80 err = fat_bmap(dir, iblock, &phys); 80 err = fat_bmap(dir, iblock, &phys, &mapped_blocks);
81 if (err || !phys) 81 if (err || !phys)
82 return -1; /* beyond EOF or error */ 82 return -1; /* beyond EOF or error */
83 83
@@ -418,7 +418,7 @@ EODir:
418 return err; 418 return err;
419} 419}
420 420
421EXPORT_SYMBOL(fat_search_long); 421EXPORT_SYMBOL_GPL(fat_search_long);
422 422
423struct fat_ioctl_filldir_callback { 423struct fat_ioctl_filldir_callback {
424 struct dirent __user *dirent; 424 struct dirent __user *dirent;
@@ -729,13 +729,13 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp,
729 729
730 buf.dirent = d1; 730 buf.dirent = d1;
731 buf.result = 0; 731 buf.result = 0;
732 down(&inode->i_sem); 732 mutex_lock(&inode->i_mutex);
733 ret = -ENOENT; 733 ret = -ENOENT;
734 if (!IS_DEADDIR(inode)) { 734 if (!IS_DEADDIR(inode)) {
735 ret = __fat_readdir(inode, filp, &buf, fat_ioctl_filldir, 735 ret = __fat_readdir(inode, filp, &buf, fat_ioctl_filldir,
736 short_only, both); 736 short_only, both);
737 } 737 }
738 up(&inode->i_sem); 738 mutex_unlock(&inode->i_mutex);
739 if (ret >= 0) 739 if (ret >= 0)
740 ret = buf.result; 740 ret = buf.result;
741 return ret; 741 return ret;
@@ -780,7 +780,7 @@ int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh,
780 return -ENOENT; 780 return -ENOENT;
781} 781}
782 782
783EXPORT_SYMBOL(fat_get_dotdot_entry); 783EXPORT_SYMBOL_GPL(fat_get_dotdot_entry);
784 784
785/* See if directory is empty */ 785/* See if directory is empty */
786int fat_dir_empty(struct inode *dir) 786int fat_dir_empty(struct inode *dir)
@@ -803,7 +803,7 @@ int fat_dir_empty(struct inode *dir)
803 return result; 803 return result;
804} 804}
805 805
806EXPORT_SYMBOL(fat_dir_empty); 806EXPORT_SYMBOL_GPL(fat_dir_empty);
807 807
808/* 808/*
809 * fat_subdirs counts the number of sub-directories of dir. It can be run 809 * fat_subdirs counts the number of sub-directories of dir. It can be run
@@ -849,7 +849,7 @@ int fat_scan(struct inode *dir, const unsigned char *name,
849 return -ENOENT; 849 return -ENOENT;
850} 850}
851 851
852EXPORT_SYMBOL(fat_scan); 852EXPORT_SYMBOL_GPL(fat_scan);
853 853
854static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots) 854static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
855{ 855{
@@ -936,7 +936,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
936 return 0; 936 return 0;
937} 937}
938 938
939EXPORT_SYMBOL(fat_remove_entries); 939EXPORT_SYMBOL_GPL(fat_remove_entries);
940 940
941static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used, 941static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used,
942 struct buffer_head **bhs, int nr_bhs) 942 struct buffer_head **bhs, int nr_bhs)
@@ -1048,7 +1048,7 @@ error:
1048 return err; 1048 return err;
1049} 1049}
1050 1050
1051EXPORT_SYMBOL(fat_alloc_new_dir); 1051EXPORT_SYMBOL_GPL(fat_alloc_new_dir);
1052 1052
1053static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots, 1053static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
1054 int *nr_cluster, struct msdos_dir_entry **de, 1054 int *nr_cluster, struct msdos_dir_entry **de,
@@ -1264,4 +1264,4 @@ error_remove:
1264 return err; 1264 return err;
1265} 1265}
1266 1266
1267EXPORT_SYMBOL(fat_add_entries); 1267EXPORT_SYMBOL_GPL(fat_add_entries);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 4164cd54c4d1..a1a9e0451217 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -476,6 +476,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster)
476 sbi->prev_free = entry; 476 sbi->prev_free = entry;
477 if (sbi->free_clusters != -1) 477 if (sbi->free_clusters != -1)
478 sbi->free_clusters--; 478 sbi->free_clusters--;
479 sb->s_dirt = 1;
479 480
480 cluster[idx_clus] = entry; 481 cluster[idx_clus] = entry;
481 idx_clus++; 482 idx_clus++;
@@ -496,6 +497,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster)
496 497
497 /* Couldn't allocate the free entries */ 498 /* Couldn't allocate the free entries */
498 sbi->free_clusters = 0; 499 sbi->free_clusters = 0;
500 sb->s_dirt = 1;
499 err = -ENOSPC; 501 err = -ENOSPC;
500 502
501out: 503out:
@@ -509,7 +511,6 @@ out:
509 } 511 }
510 for (i = 0; i < nr_bhs; i++) 512 for (i = 0; i < nr_bhs; i++)
511 brelse(bhs[i]); 513 brelse(bhs[i]);
512 fat_clusters_flush(sb);
513 514
514 if (err && idx_clus) 515 if (err && idx_clus)
515 fat_free_clusters(inode, cluster[0]); 516 fat_free_clusters(inode, cluster[0]);
@@ -542,8 +543,10 @@ int fat_free_clusters(struct inode *inode, int cluster)
542 } 543 }
543 544
544 ops->ent_put(&fatent, FAT_ENT_FREE); 545 ops->ent_put(&fatent, FAT_ENT_FREE);
545 if (sbi->free_clusters != -1) 546 if (sbi->free_clusters != -1) {
546 sbi->free_clusters++; 547 sbi->free_clusters++;
548 sb->s_dirt = 1;
549 }
547 550
548 if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) { 551 if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) {
549 if (sb->s_flags & MS_SYNCHRONOUS) { 552 if (sb->s_flags & MS_SYNCHRONOUS) {
@@ -578,7 +581,7 @@ error:
578 return err; 581 return err;
579} 582}
580 583
581EXPORT_SYMBOL(fat_free_clusters); 584EXPORT_SYMBOL_GPL(fat_free_clusters);
582 585
583int fat_count_free_clusters(struct super_block *sb) 586int fat_count_free_clusters(struct super_block *sb)
584{ 587{
@@ -605,6 +608,7 @@ int fat_count_free_clusters(struct super_block *sb)
605 } while (fat_ent_next(sbi, &fatent)); 608 } while (fat_ent_next(sbi, &fatent));
606 } 609 }
607 sbi->free_clusters = free; 610 sbi->free_clusters = free;
611 sb->s_dirt = 1;
608 fatent_brelse(&fatent); 612 fatent_brelse(&fatent);
609out: 613out:
610 unlock_fat(sbi); 614 unlock_fat(sbi);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 7134403d5be2..e99c5a73b39e 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -6,11 +6,13 @@
6 * regular file handling primitives for fat-based filesystems 6 * regular file handling primitives for fat-based filesystems
7 */ 7 */
8 8
9#include <linux/capability.h>
9#include <linux/module.h> 10#include <linux/module.h>
10#include <linux/time.h> 11#include <linux/time.h>
11#include <linux/msdos_fs.h> 12#include <linux/msdos_fs.h>
12#include <linux/smp_lock.h> 13#include <linux/smp_lock.h>
13#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/writeback.h>
14 16
15int fat_generic_ioctl(struct inode *inode, struct file *filp, 17int fat_generic_ioctl(struct inode *inode, struct file *filp,
16 unsigned int cmd, unsigned long arg) 18 unsigned int cmd, unsigned long arg)
@@ -40,7 +42,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
40 if (err) 42 if (err)
41 return err; 43 return err;
42 44
43 down(&inode->i_sem); 45 mutex_lock(&inode->i_mutex);
44 46
45 if (IS_RDONLY(inode)) { 47 if (IS_RDONLY(inode)) {
46 err = -EROFS; 48 err = -EROFS;
@@ -102,7 +104,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
102 MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; 104 MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED;
103 mark_inode_dirty(inode); 105 mark_inode_dirty(inode);
104 up: 106 up:
105 up(&inode->i_sem); 107 mutex_unlock(&inode->i_mutex);
106 return err; 108 return err;
107 } 109 }
108 default: 110 default:
@@ -124,6 +126,24 @@ struct file_operations fat_file_operations = {
124 .sendfile = generic_file_sendfile, 126 .sendfile = generic_file_sendfile,
125}; 127};
126 128
129static int fat_cont_expand(struct inode *inode, loff_t size)
130{
131 struct address_space *mapping = inode->i_mapping;
132 loff_t start = inode->i_size, count = size - inode->i_size;
133 int err;
134
135 err = generic_cont_expand_simple(inode, size);
136 if (err)
137 goto out;
138
139 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
140 mark_inode_dirty(inode);
141 if (IS_SYNC(inode))
142 err = sync_page_range_nolock(inode, mapping, start, count);
143out:
144 return err;
145}
146
127int fat_notify_change(struct dentry *dentry, struct iattr *attr) 147int fat_notify_change(struct dentry *dentry, struct iattr *attr)
128{ 148{
129 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); 149 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
@@ -132,11 +152,17 @@ int fat_notify_change(struct dentry *dentry, struct iattr *attr)
132 152
133 lock_kernel(); 153 lock_kernel();
134 154
135 /* FAT cannot truncate to a longer file */ 155 /*
156 * Expand the file. Since inode_setattr() updates ->i_size
157 * before calling the ->truncate(), but FAT needs to fill the
158 * hole before it.
159 */
136 if (attr->ia_valid & ATTR_SIZE) { 160 if (attr->ia_valid & ATTR_SIZE) {
137 if (attr->ia_size > inode->i_size) { 161 if (attr->ia_size > inode->i_size) {
138 error = -EPERM; 162 error = fat_cont_expand(inode, attr->ia_size);
139 goto out; 163 if (error || attr->ia_valid == ATTR_SIZE)
164 goto out;
165 attr->ia_valid &= ~ATTR_SIZE;
140 } 166 }
141 } 167 }
142 168
@@ -173,7 +199,7 @@ out:
173 return error; 199 return error;
174} 200}
175 201
176EXPORT_SYMBOL(fat_notify_change); 202EXPORT_SYMBOL_GPL(fat_notify_change);
177 203
178/* Free all clusters after the skip'th cluster. */ 204/* Free all clusters after the skip'th cluster. */
179static int fat_free(struct inode *inode, int skip) 205static int fat_free(struct inode *inode, int skip)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a0f9b9fe1307..e7f4aa7fc686 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -18,10 +18,12 @@
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <linux/msdos_fs.h> 19#include <linux/msdos_fs.h>
20#include <linux/pagemap.h> 20#include <linux/pagemap.h>
21#include <linux/mpage.h>
21#include <linux/buffer_head.h> 22#include <linux/buffer_head.h>
22#include <linux/mount.h> 23#include <linux/mount.h>
23#include <linux/vfs.h> 24#include <linux/vfs.h>
24#include <linux/parser.h> 25#include <linux/parser.h>
26#include <linux/uio.h>
25#include <asm/unaligned.h> 27#include <asm/unaligned.h>
26 28
27#ifndef CONFIG_FAT_DEFAULT_IOCHARSET 29#ifndef CONFIG_FAT_DEFAULT_IOCHARSET
@@ -48,51 +50,97 @@ static int fat_add_cluster(struct inode *inode)
48 return err; 50 return err;
49} 51}
50 52
51static int fat_get_block(struct inode *inode, sector_t iblock, 53static int __fat_get_blocks(struct inode *inode, sector_t iblock,
52 struct buffer_head *bh_result, int create) 54 unsigned long *max_blocks,
55 struct buffer_head *bh_result, int create)
53{ 56{
54 struct super_block *sb = inode->i_sb; 57 struct super_block *sb = inode->i_sb;
58 struct msdos_sb_info *sbi = MSDOS_SB(sb);
55 sector_t phys; 59 sector_t phys;
56 int err; 60 unsigned long mapped_blocks;
61 int err, offset;
57 62
58 err = fat_bmap(inode, iblock, &phys); 63 err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
59 if (err) 64 if (err)
60 return err; 65 return err;
61 if (phys) { 66 if (phys) {
62 map_bh(bh_result, sb, phys); 67 map_bh(bh_result, sb, phys);
68 *max_blocks = min(mapped_blocks, *max_blocks);
63 return 0; 69 return 0;
64 } 70 }
65 if (!create) 71 if (!create)
66 return 0; 72 return 0;
73
67 if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) { 74 if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) {
68 fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)", 75 fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)",
69 MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private); 76 MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private);
70 return -EIO; 77 return -EIO;
71 } 78 }
72 if (!((unsigned long)iblock & (MSDOS_SB(sb)->sec_per_clus - 1))) { 79
80 offset = (unsigned long)iblock & (sbi->sec_per_clus - 1);
81 if (!offset) {
82 /* TODO: multiple cluster allocation would be desirable. */
73 err = fat_add_cluster(inode); 83 err = fat_add_cluster(inode);
74 if (err) 84 if (err)
75 return err; 85 return err;
76 } 86 }
77 MSDOS_I(inode)->mmu_private += sb->s_blocksize; 87 /* available blocks on this cluster */
78 err = fat_bmap(inode, iblock, &phys); 88 mapped_blocks = sbi->sec_per_clus - offset;
89
90 *max_blocks = min(mapped_blocks, *max_blocks);
91 MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
92
93 err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
79 if (err) 94 if (err)
80 return err; 95 return err;
81 if (!phys) 96 BUG_ON(!phys);
82 BUG(); 97 BUG_ON(*max_blocks != mapped_blocks);
83 set_buffer_new(bh_result); 98 set_buffer_new(bh_result);
84 map_bh(bh_result, sb, phys); 99 map_bh(bh_result, sb, phys);
85 return 0; 100 return 0;
86} 101}
87 102
103static int fat_get_blocks(struct inode *inode, sector_t iblock,
104 unsigned long max_blocks,
105 struct buffer_head *bh_result, int create)
106{
107 struct super_block *sb = inode->i_sb;
108 int err;
109
110 err = __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create);
111 if (err)
112 return err;
113 bh_result->b_size = max_blocks << sb->s_blocksize_bits;
114 return 0;
115}
116
117static int fat_get_block(struct inode *inode, sector_t iblock,
118 struct buffer_head *bh_result, int create)
119{
120 unsigned long max_blocks = 1;
121 return __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create);
122}
123
88static int fat_writepage(struct page *page, struct writeback_control *wbc) 124static int fat_writepage(struct page *page, struct writeback_control *wbc)
89{ 125{
90 return block_write_full_page(page, fat_get_block, wbc); 126 return block_write_full_page(page, fat_get_block, wbc);
91} 127}
92 128
129static int fat_writepages(struct address_space *mapping,
130 struct writeback_control *wbc)
131{
132 return mpage_writepages(mapping, wbc, fat_get_block);
133}
134
93static int fat_readpage(struct file *file, struct page *page) 135static int fat_readpage(struct file *file, struct page *page)
94{ 136{
95 return block_read_full_page(page, fat_get_block); 137 return mpage_readpage(page, fat_get_block);
138}
139
140static int fat_readpages(struct file *file, struct address_space *mapping,
141 struct list_head *pages, unsigned nr_pages)
142{
143 return mpage_readpages(mapping, pages, nr_pages, fat_get_block);
96} 144}
97 145
98static int fat_prepare_write(struct file *file, struct page *page, 146static int fat_prepare_write(struct file *file, struct page *page,
@@ -115,6 +163,34 @@ static int fat_commit_write(struct file *file, struct page *page,
115 return err; 163 return err;
116} 164}
117 165
166static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
167 const struct iovec *iov,
168 loff_t offset, unsigned long nr_segs)
169{
170 struct file *file = iocb->ki_filp;
171 struct inode *inode = file->f_mapping->host;
172
173 if (rw == WRITE) {
174 /*
175 * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(),
176 * so we need to update the ->mmu_private to block boundary.
177 *
178 * But we must fill the remaining area or hole by nul for
179 * updating ->mmu_private.
180 */
181 loff_t size = offset + iov_length(iov, nr_segs);
182 if (MSDOS_I(inode)->mmu_private < size)
183 return -EINVAL;
184 }
185
186 /*
187 * FAT need to use the DIO_LOCKING for avoiding the race
188 * condition of fat_get_block() and ->truncate().
189 */
190 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
191 offset, nr_segs, fat_get_blocks, NULL);
192}
193
118static sector_t _fat_bmap(struct address_space *mapping, sector_t block) 194static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
119{ 195{
120 return generic_block_bmap(mapping, block, fat_get_block); 196 return generic_block_bmap(mapping, block, fat_get_block);
@@ -122,10 +198,13 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
122 198
123static struct address_space_operations fat_aops = { 199static struct address_space_operations fat_aops = {
124 .readpage = fat_readpage, 200 .readpage = fat_readpage,
201 .readpages = fat_readpages,
125 .writepage = fat_writepage, 202 .writepage = fat_writepage,
203 .writepages = fat_writepages,
126 .sync_page = block_sync_page, 204 .sync_page = block_sync_page,
127 .prepare_write = fat_prepare_write, 205 .prepare_write = fat_prepare_write,
128 .commit_write = fat_commit_write, 206 .commit_write = fat_commit_write,
207 .direct_IO = fat_direct_IO,
129 .bmap = _fat_bmap 208 .bmap = _fat_bmap
130}; 209};
131 210
@@ -182,7 +261,7 @@ void fat_attach(struct inode *inode, loff_t i_pos)
182 spin_unlock(&sbi->inode_hash_lock); 261 spin_unlock(&sbi->inode_hash_lock);
183} 262}
184 263
185EXPORT_SYMBOL(fat_attach); 264EXPORT_SYMBOL_GPL(fat_attach);
186 265
187void fat_detach(struct inode *inode) 266void fat_detach(struct inode *inode)
188{ 267{
@@ -193,7 +272,7 @@ void fat_detach(struct inode *inode)
193 spin_unlock(&sbi->inode_hash_lock); 272 spin_unlock(&sbi->inode_hash_lock);
194} 273}
195 274
196EXPORT_SYMBOL(fat_detach); 275EXPORT_SYMBOL_GPL(fat_detach);
197 276
198struct inode *fat_iget(struct super_block *sb, loff_t i_pos) 277struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
199{ 278{
@@ -347,7 +426,7 @@ out:
347 return inode; 426 return inode;
348} 427}
349 428
350EXPORT_SYMBOL(fat_build_inode); 429EXPORT_SYMBOL_GPL(fat_build_inode);
351 430
352static void fat_delete_inode(struct inode *inode) 431static void fat_delete_inode(struct inode *inode)
353{ 432{
@@ -374,12 +453,17 @@ static void fat_clear_inode(struct inode *inode)
374 unlock_kernel(); 453 unlock_kernel();
375} 454}
376 455
377static void fat_put_super(struct super_block *sb) 456static void fat_write_super(struct super_block *sb)
378{ 457{
379 struct msdos_sb_info *sbi = MSDOS_SB(sb); 458 sb->s_dirt = 0;
380 459
381 if (!(sb->s_flags & MS_RDONLY)) 460 if (!(sb->s_flags & MS_RDONLY))
382 fat_clusters_flush(sb); 461 fat_clusters_flush(sb);
462}
463
464static void fat_put_super(struct super_block *sb)
465{
466 struct msdos_sb_info *sbi = MSDOS_SB(sb);
383 467
384 if (sbi->nls_disk) { 468 if (sbi->nls_disk) {
385 unload_nls(sbi->nls_disk); 469 unload_nls(sbi->nls_disk);
@@ -537,7 +621,7 @@ int fat_sync_inode(struct inode *inode)
537 return fat_write_inode(inode, 1); 621 return fat_write_inode(inode, 1);
538} 622}
539 623
540EXPORT_SYMBOL(fat_sync_inode); 624EXPORT_SYMBOL_GPL(fat_sync_inode);
541 625
542static int fat_show_options(struct seq_file *m, struct vfsmount *mnt); 626static int fat_show_options(struct seq_file *m, struct vfsmount *mnt);
543static struct super_operations fat_sops = { 627static struct super_operations fat_sops = {
@@ -546,6 +630,7 @@ static struct super_operations fat_sops = {
546 .write_inode = fat_write_inode, 630 .write_inode = fat_write_inode,
547 .delete_inode = fat_delete_inode, 631 .delete_inode = fat_delete_inode,
548 .put_super = fat_put_super, 632 .put_super = fat_put_super,
633 .write_super = fat_write_super,
549 .statfs = fat_statfs, 634 .statfs = fat_statfs,
550 .clear_inode = fat_clear_inode, 635 .clear_inode = fat_clear_inode,
551 .remount_fs = fat_remount, 636 .remount_fs = fat_remount,
@@ -1347,7 +1432,7 @@ out_fail:
1347 return error; 1432 return error;
1348} 1433}
1349 1434
1350EXPORT_SYMBOL(fat_fill_super); 1435EXPORT_SYMBOL_GPL(fat_fill_super);
1351 1436
1352int __init fat_cache_init(void); 1437int __init fat_cache_init(void);
1353void fat_cache_destroy(void); 1438void fat_cache_destroy(void);
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 2a0df2122f5d..32fb0a3f1da4 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -33,7 +33,7 @@ void fat_fs_panic(struct super_block *s, const char *fmt, ...)
33 } 33 }
34} 34}
35 35
36EXPORT_SYMBOL(fat_fs_panic); 36EXPORT_SYMBOL_GPL(fat_fs_panic);
37 37
38/* Flushes the number of free clusters on FAT32 */ 38/* Flushes the number of free clusters on FAT32 */
39/* XXX: Need to write one per FSINFO block. Currently only writes 1 */ 39/* XXX: Need to write one per FSINFO block. Currently only writes 1 */
@@ -67,8 +67,6 @@ void fat_clusters_flush(struct super_block *sb)
67 if (sbi->prev_free != -1) 67 if (sbi->prev_free != -1)
68 fsinfo->next_cluster = cpu_to_le32(sbi->prev_free); 68 fsinfo->next_cluster = cpu_to_le32(sbi->prev_free);
69 mark_buffer_dirty(bh); 69 mark_buffer_dirty(bh);
70 if (sb->s_flags & MS_SYNCHRONOUS)
71 sync_dirty_buffer(bh);
72 } 70 }
73 brelse(bh); 71 brelse(bh);
74} 72}
@@ -194,7 +192,7 @@ void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date)
194 *date = cpu_to_le16(nl_day-day_n[month-1]+1+(month << 5)+(year << 9)); 192 *date = cpu_to_le16(nl_day-day_n[month-1]+1+(month << 5)+(year << 9));
195} 193}
196 194
197EXPORT_SYMBOL(fat_date_unix2dos); 195EXPORT_SYMBOL_GPL(fat_date_unix2dos);
198 196
199int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) 197int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
200{ 198{
@@ -222,4 +220,4 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
222 return err; 220 return err;
223} 221}
224 222
225EXPORT_SYMBOL(fat_sync_bhs); 223EXPORT_SYMBOL_GPL(fat_sync_bhs);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 863b46e0d78a..5f96786d1c73 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -9,6 +9,7 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/file.h> 11#include <linux/file.h>
12#include <linux/capability.h>
12#include <linux/dnotify.h> 13#include <linux/dnotify.h>
13#include <linux/smp_lock.h> 14#include <linux/smp_lock.h>
14#include <linux/slab.h> 15#include <linux/slab.h>
@@ -35,7 +36,7 @@ void fastcall set_close_on_exec(unsigned int fd, int flag)
35 spin_unlock(&files->file_lock); 36 spin_unlock(&files->file_lock);
36} 37}
37 38
38static inline int get_close_on_exec(unsigned int fd) 39static int get_close_on_exec(unsigned int fd)
39{ 40{
40 struct files_struct *files = current->files; 41 struct files_struct *files = current->files;
41 struct fdtable *fdt; 42 struct fdtable *fdt;
@@ -457,11 +458,11 @@ static void send_sigio_to_task(struct task_struct *p,
457 else 458 else
458 si.si_band = band_table[reason - POLL_IN]; 459 si.si_band = band_table[reason - POLL_IN];
459 si.si_fd = fd; 460 si.si_fd = fd;
460 if (!send_group_sig_info(fown->signum, &si, p)) 461 if (!group_send_sig_info(fown->signum, &si, p))
461 break; 462 break;
462 /* fall-through: fall back on the old plain SIGIO signal */ 463 /* fall-through: fall back on the old plain SIGIO signal */
463 case 0: 464 case 0:
464 send_group_sig_info(SIGIO, SEND_SIG_PRIV, p); 465 group_send_sig_info(SIGIO, SEND_SIG_PRIV, p);
465 } 466 }
466} 467}
467 468
@@ -495,7 +496,7 @@ static void send_sigurg_to_task(struct task_struct *p,
495 struct fown_struct *fown) 496 struct fown_struct *fown)
496{ 497{
497 if (sigio_perm(p, fown, SIGURG)) 498 if (sigio_perm(p, fown, SIGURG))
498 send_group_sig_info(SIGURG, SEND_SIG_PRIV, p); 499 group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
499} 500}
500 501
501int send_sigurg(struct fown_struct *fown) 502int send_sigurg(struct fown_struct *fown)
diff --git a/fs/fifo.c b/fs/fifo.c
index 5455916241f0..923371b753ab 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -35,7 +35,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
35 int ret; 35 int ret;
36 36
37 ret = -ERESTARTSYS; 37 ret = -ERESTARTSYS;
38 if (down_interruptible(PIPE_SEM(*inode))) 38 if (mutex_lock_interruptible(PIPE_MUTEX(*inode)))
39 goto err_nolock_nocleanup; 39 goto err_nolock_nocleanup;
40 40
41 if (!inode->i_pipe) { 41 if (!inode->i_pipe) {
@@ -119,7 +119,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
119 } 119 }
120 120
121 /* Ok! */ 121 /* Ok! */
122 up(PIPE_SEM(*inode)); 122 mutex_unlock(PIPE_MUTEX(*inode));
123 return 0; 123 return 0;
124 124
125err_rd: 125err_rd:
@@ -139,7 +139,7 @@ err:
139 free_pipe_info(inode); 139 free_pipe_info(inode);
140 140
141err_nocleanup: 141err_nocleanup:
142 up(PIPE_SEM(*inode)); 142 mutex_unlock(PIPE_MUTEX(*inode));
143 143
144err_nolock_nocleanup: 144err_nolock_nocleanup:
145 return ret; 145 return ret;
diff --git a/fs/file_table.c b/fs/file_table.c
index c3a5e2fd663b..768b58167543 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -16,6 +16,7 @@
16#include <linux/eventpoll.h> 16#include <linux/eventpoll.h>
17#include <linux/rcupdate.h> 17#include <linux/rcupdate.h>
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/capability.h>
19#include <linux/cdev.h> 20#include <linux/cdev.h>
20#include <linux/fsnotify.h> 21#include <linux/fsnotify.h>
21 22
@@ -117,7 +118,7 @@ EXPORT_SYMBOL(get_empty_filp);
117 118
118void fastcall fput(struct file *file) 119void fastcall fput(struct file *file)
119{ 120{
120 if (rcuref_dec_and_test(&file->f_count)) 121 if (atomic_dec_and_test(&file->f_count))
121 __fput(file); 122 __fput(file);
122} 123}
123 124
@@ -166,7 +167,7 @@ struct file fastcall *fget(unsigned int fd)
166 rcu_read_lock(); 167 rcu_read_lock();
167 file = fcheck_files(files, fd); 168 file = fcheck_files(files, fd);
168 if (file) { 169 if (file) {
169 if (!rcuref_inc_lf(&file->f_count)) { 170 if (!atomic_inc_not_zero(&file->f_count)) {
170 /* File object ref couldn't be taken */ 171 /* File object ref couldn't be taken */
171 rcu_read_unlock(); 172 rcu_read_unlock();
172 return NULL; 173 return NULL;
@@ -198,7 +199,7 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
198 rcu_read_lock(); 199 rcu_read_lock();
199 file = fcheck_files(files, fd); 200 file = fcheck_files(files, fd);
200 if (file) { 201 if (file) {
201 if (rcuref_inc_lf(&file->f_count)) 202 if (atomic_inc_not_zero(&file->f_count))
202 *fput_needed = 1; 203 *fput_needed = 1;
203 else 204 else
204 /* Didn't get the reference, someone's freed */ 205 /* Didn't get the reference, someone's freed */
@@ -213,7 +214,7 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
213 214
214void put_filp(struct file *file) 215void put_filp(struct file *file)
215{ 216{
216 if (rcuref_dec_and_test(&file->f_count)) { 217 if (atomic_dec_and_test(&file->f_count)) {
217 security_file_free(file); 218 security_file_free(file);
218 file_kill(file); 219 file_kill(file);
219 file_free(file); 220 file_free(file);
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index d0401dc68d41..6f5df1700e95 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -99,8 +99,8 @@ static int
99vxfs_immed_readpage(struct file *fp, struct page *pp) 99vxfs_immed_readpage(struct file *fp, struct page *pp)
100{ 100{
101 struct vxfs_inode_info *vip = VXFS_INO(pp->mapping->host); 101 struct vxfs_inode_info *vip = VXFS_INO(pp->mapping->host);
102 u_int64_t offset = pp->index << PAGE_CACHE_SHIFT; 102 u_int64_t offset = (u_int64_t)pp->index << PAGE_CACHE_SHIFT;
103 caddr_t kaddr; 103 caddr_t kaddr;
104 104
105 kaddr = kmap(pp); 105 kaddr = kmap(pp);
106 memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_CACHE_SIZE); 106 memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_CACHE_SIZE);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index e08ab4702d97..4526da8907c6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -21,18 +21,18 @@ MODULE_ALIAS_MISCDEV(FUSE_MINOR);
21 21
22static kmem_cache_t *fuse_req_cachep; 22static kmem_cache_t *fuse_req_cachep;
23 23
24static inline struct fuse_conn *fuse_get_conn(struct file *file) 24static struct fuse_conn *fuse_get_conn(struct file *file)
25{ 25{
26 struct fuse_conn *fc; 26 struct fuse_conn *fc;
27 spin_lock(&fuse_lock); 27 spin_lock(&fuse_lock);
28 fc = file->private_data; 28 fc = file->private_data;
29 if (fc && !fc->mounted) 29 if (fc && !fc->connected)
30 fc = NULL; 30 fc = NULL;
31 spin_unlock(&fuse_lock); 31 spin_unlock(&fuse_lock);
32 return fc; 32 return fc;
33} 33}
34 34
35static inline void fuse_request_init(struct fuse_req *req) 35static void fuse_request_init(struct fuse_req *req)
36{ 36{
37 memset(req, 0, sizeof(*req)); 37 memset(req, 0, sizeof(*req));
38 INIT_LIST_HEAD(&req->list); 38 INIT_LIST_HEAD(&req->list);
@@ -53,7 +53,7 @@ void fuse_request_free(struct fuse_req *req)
53 kmem_cache_free(fuse_req_cachep, req); 53 kmem_cache_free(fuse_req_cachep, req);
54} 54}
55 55
56static inline void block_sigs(sigset_t *oldset) 56static void block_sigs(sigset_t *oldset)
57{ 57{
58 sigset_t mask; 58 sigset_t mask;
59 59
@@ -61,7 +61,7 @@ static inline void block_sigs(sigset_t *oldset)
61 sigprocmask(SIG_BLOCK, &mask, oldset); 61 sigprocmask(SIG_BLOCK, &mask, oldset);
62} 62}
63 63
64static inline void restore_sigs(sigset_t *oldset) 64static void restore_sigs(sigset_t *oldset)
65{ 65{
66 sigprocmask(SIG_SETMASK, oldset, NULL); 66 sigprocmask(SIG_SETMASK, oldset, NULL);
67} 67}
@@ -109,18 +109,24 @@ struct fuse_req *fuse_get_request(struct fuse_conn *fc)
109 int intr; 109 int intr;
110 sigset_t oldset; 110 sigset_t oldset;
111 111
112 atomic_inc(&fc->num_waiting);
112 block_sigs(&oldset); 113 block_sigs(&oldset);
113 intr = down_interruptible(&fc->outstanding_sem); 114 intr = down_interruptible(&fc->outstanding_sem);
114 restore_sigs(&oldset); 115 restore_sigs(&oldset);
115 return intr ? NULL : do_get_request(fc); 116 if (intr) {
117 atomic_dec(&fc->num_waiting);
118 return NULL;
119 }
120 return do_get_request(fc);
116} 121}
117 122
118static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req) 123static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
119{ 124{
120 spin_lock(&fuse_lock); 125 spin_lock(&fuse_lock);
121 if (req->preallocated) 126 if (req->preallocated) {
127 atomic_dec(&fc->num_waiting);
122 list_add(&req->list, &fc->unused_list); 128 list_add(&req->list, &fc->unused_list);
123 else 129 } else
124 fuse_request_free(req); 130 fuse_request_free(req);
125 131
126 /* If we are in debt decrease that first */ 132 /* If we are in debt decrease that first */
@@ -148,42 +154,23 @@ void fuse_release_background(struct fuse_req *req)
148 spin_unlock(&fuse_lock); 154 spin_unlock(&fuse_lock);
149} 155}
150 156
151static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
152{
153 int i;
154 struct fuse_init_out *arg = &req->misc.init_out;
155
156 if (arg->major != FUSE_KERNEL_VERSION)
157 fc->conn_error = 1;
158 else {
159 fc->minor = arg->minor;
160 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
161 }
162
163 /* After INIT reply is received other requests can go
164 out. So do (FUSE_MAX_OUTSTANDING - 1) number of
165 up()s on outstanding_sem. The last up() is done in
166 fuse_putback_request() */
167 for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
168 up(&fc->outstanding_sem);
169}
170
171/* 157/*
172 * This function is called when a request is finished. Either a reply 158 * This function is called when a request is finished. Either a reply
173 * has arrived or it was interrupted (and not yet sent) or some error 159 * has arrived or it was interrupted (and not yet sent) or some error
174 * occurred during communication with userspace, or the device file was 160 * occurred during communication with userspace, or the device file
175 * closed. It decreases the reference count for the request. In case 161 * was closed. In case of a background request the reference to the
176 * of a background request the reference to the stored objects are 162 * stored objects are released. The requester thread is woken up (if
177 * released. The requester thread is woken up (if still waiting), and 163 * still waiting), the 'end' callback is called if given, else the
178 * finally the request is either freed or put on the unused_list 164 * reference to the request is released
179 * 165 *
180 * Called with fuse_lock, unlocks it 166 * Called with fuse_lock, unlocks it
181 */ 167 */
182static void request_end(struct fuse_conn *fc, struct fuse_req *req) 168static void request_end(struct fuse_conn *fc, struct fuse_req *req)
183{ 169{
184 int putback; 170 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
185 req->finished = 1; 171 req->end = NULL;
186 putback = atomic_dec_and_test(&req->count); 172 list_del(&req->list);
173 req->state = FUSE_REQ_FINISHED;
187 spin_unlock(&fuse_lock); 174 spin_unlock(&fuse_lock);
188 if (req->background) { 175 if (req->background) {
189 down_read(&fc->sbput_sem); 176 down_read(&fc->sbput_sem);
@@ -192,18 +179,10 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
192 up_read(&fc->sbput_sem); 179 up_read(&fc->sbput_sem);
193 } 180 }
194 wake_up(&req->waitq); 181 wake_up(&req->waitq);
195 if (req->in.h.opcode == FUSE_INIT) 182 if (end)
196 process_init_reply(fc, req); 183 end(fc, req);
197 else if (req->in.h.opcode == FUSE_RELEASE && req->inode == NULL) { 184 else
198 /* Special case for failed iget in CREATE */ 185 fuse_put_request(fc, req);
199 u64 nodeid = req->in.h.nodeid;
200 __fuse_get_request(req);
201 fuse_reset_request(req);
202 fuse_send_forget(fc, req, nodeid, 1);
203 putback = 0;
204 }
205 if (putback)
206 fuse_putback_request(fc, req);
207} 186}
208 187
209/* 188/*
@@ -254,14 +233,16 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
254 233
255 spin_unlock(&fuse_lock); 234 spin_unlock(&fuse_lock);
256 block_sigs(&oldset); 235 block_sigs(&oldset);
257 wait_event_interruptible(req->waitq, req->finished); 236 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
258 restore_sigs(&oldset); 237 restore_sigs(&oldset);
259 spin_lock(&fuse_lock); 238 spin_lock(&fuse_lock);
260 if (req->finished) 239 if (req->state == FUSE_REQ_FINISHED && !req->interrupted)
261 return; 240 return;
262 241
263 req->out.h.error = -EINTR; 242 if (!req->interrupted) {
264 req->interrupted = 1; 243 req->out.h.error = -EINTR;
244 req->interrupted = 1;
245 }
265 if (req->locked) { 246 if (req->locked) {
266 /* This is uninterruptible sleep, because data is 247 /* This is uninterruptible sleep, because data is
267 being copied to/from the buffers of req. During 248 being copied to/from the buffers of req. During
@@ -272,10 +253,10 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
272 wait_event(req->waitq, !req->locked); 253 wait_event(req->waitq, !req->locked);
273 spin_lock(&fuse_lock); 254 spin_lock(&fuse_lock);
274 } 255 }
275 if (!req->sent && !list_empty(&req->list)) { 256 if (req->state == FUSE_REQ_PENDING) {
276 list_del(&req->list); 257 list_del(&req->list);
277 __fuse_put_request(req); 258 __fuse_put_request(req);
278 } else if (!req->finished && req->sent) 259 } else if (req->state == FUSE_REQ_SENT)
279 background_request(fc, req); 260 background_request(fc, req);
280} 261}
281 262
@@ -310,6 +291,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
310 fc->outstanding_debt++; 291 fc->outstanding_debt++;
311 } 292 }
312 list_add_tail(&req->list, &fc->pending); 293 list_add_tail(&req->list, &fc->pending);
294 req->state = FUSE_REQ_PENDING;
313 wake_up(&fc->waitq); 295 wake_up(&fc->waitq);
314} 296}
315 297
@@ -362,34 +344,12 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
362 request_send_nowait(fc, req); 344 request_send_nowait(fc, req);
363} 345}
364 346
365void fuse_send_init(struct fuse_conn *fc)
366{
367 /* This is called from fuse_read_super() so there's guaranteed
368 to be a request available */
369 struct fuse_req *req = do_get_request(fc);
370 struct fuse_init_in *arg = &req->misc.init_in;
371 arg->major = FUSE_KERNEL_VERSION;
372 arg->minor = FUSE_KERNEL_MINOR_VERSION;
373 req->in.h.opcode = FUSE_INIT;
374 req->in.numargs = 1;
375 req->in.args[0].size = sizeof(*arg);
376 req->in.args[0].value = arg;
377 req->out.numargs = 1;
378 /* Variable length arguement used for backward compatibility
379 with interface version < 7.5. Rest of init_out is zeroed
380 by do_get_request(), so a short reply is not a problem */
381 req->out.argvar = 1;
382 req->out.args[0].size = sizeof(struct fuse_init_out);
383 req->out.args[0].value = &req->misc.init_out;
384 request_send_background(fc, req);
385}
386
387/* 347/*
388 * Lock the request. Up to the next unlock_request() there mustn't be 348 * Lock the request. Up to the next unlock_request() there mustn't be
389 * anything that could cause a page-fault. If the request was already 349 * anything that could cause a page-fault. If the request was already
390 * interrupted bail out. 350 * interrupted bail out.
391 */ 351 */
392static inline int lock_request(struct fuse_req *req) 352static int lock_request(struct fuse_req *req)
393{ 353{
394 int err = 0; 354 int err = 0;
395 if (req) { 355 if (req) {
@@ -408,7 +368,7 @@ static inline int lock_request(struct fuse_req *req)
408 * requester thread is currently waiting for it to be unlocked, so 368 * requester thread is currently waiting for it to be unlocked, so
409 * wake it up. 369 * wake it up.
410 */ 370 */
411static inline void unlock_request(struct fuse_req *req) 371static void unlock_request(struct fuse_req *req)
412{ 372{
413 if (req) { 373 if (req) {
414 spin_lock(&fuse_lock); 374 spin_lock(&fuse_lock);
@@ -444,7 +404,7 @@ static void fuse_copy_init(struct fuse_copy_state *cs, int write,
444} 404}
445 405
446/* Unmap and put previous page of userspace buffer */ 406/* Unmap and put previous page of userspace buffer */
447static inline void fuse_copy_finish(struct fuse_copy_state *cs) 407static void fuse_copy_finish(struct fuse_copy_state *cs)
448{ 408{
449 if (cs->mapaddr) { 409 if (cs->mapaddr) {
450 kunmap_atomic(cs->mapaddr, KM_USER0); 410 kunmap_atomic(cs->mapaddr, KM_USER0);
@@ -493,8 +453,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
493} 453}
494 454
495/* Do as much copy to/from userspace buffer as we can */ 455/* Do as much copy to/from userspace buffer as we can */
496static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val, 456static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
497 unsigned *size)
498{ 457{
499 unsigned ncpy = min(*size, cs->len); 458 unsigned ncpy = min(*size, cs->len);
500 if (val) { 459 if (val) {
@@ -514,8 +473,8 @@ static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val,
514 * Copy a page in the request to/from the userspace buffer. Must be 473 * Copy a page in the request to/from the userspace buffer. Must be
515 * done atomically 474 * done atomically
516 */ 475 */
517static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page, 476static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
518 unsigned offset, unsigned count, int zeroing) 477 unsigned offset, unsigned count, int zeroing)
519{ 478{
520 if (page && zeroing && count < PAGE_SIZE) { 479 if (page && zeroing && count < PAGE_SIZE) {
521 void *mapaddr = kmap_atomic(page, KM_USER1); 480 void *mapaddr = kmap_atomic(page, KM_USER1);
@@ -597,7 +556,7 @@ static void request_wait(struct fuse_conn *fc)
597 DECLARE_WAITQUEUE(wait, current); 556 DECLARE_WAITQUEUE(wait, current);
598 557
599 add_wait_queue_exclusive(&fc->waitq, &wait); 558 add_wait_queue_exclusive(&fc->waitq, &wait);
600 while (fc->mounted && list_empty(&fc->pending)) { 559 while (fc->connected && list_empty(&fc->pending)) {
601 set_current_state(TASK_INTERRUPTIBLE); 560 set_current_state(TASK_INTERRUPTIBLE);
602 if (signal_pending(current)) 561 if (signal_pending(current))
603 break; 562 break;
@@ -637,14 +596,15 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
637 goto err_unlock; 596 goto err_unlock;
638 request_wait(fc); 597 request_wait(fc);
639 err = -ENODEV; 598 err = -ENODEV;
640 if (!fc->mounted) 599 if (!fc->connected)
641 goto err_unlock; 600 goto err_unlock;
642 err = -ERESTARTSYS; 601 err = -ERESTARTSYS;
643 if (list_empty(&fc->pending)) 602 if (list_empty(&fc->pending))
644 goto err_unlock; 603 goto err_unlock;
645 604
646 req = list_entry(fc->pending.next, struct fuse_req, list); 605 req = list_entry(fc->pending.next, struct fuse_req, list);
647 list_del_init(&req->list); 606 req->state = FUSE_REQ_READING;
607 list_move(&req->list, &fc->io);
648 608
649 in = &req->in; 609 in = &req->in;
650 reqsize = in->h.len; 610 reqsize = in->h.len;
@@ -677,8 +637,8 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
677 if (!req->isreply) 637 if (!req->isreply)
678 request_end(fc, req); 638 request_end(fc, req);
679 else { 639 else {
680 req->sent = 1; 640 req->state = FUSE_REQ_SENT;
681 list_add_tail(&req->list, &fc->processing); 641 list_move_tail(&req->list, &fc->processing);
682 spin_unlock(&fuse_lock); 642 spin_unlock(&fuse_lock);
683 } 643 }
684 return reqsize; 644 return reqsize;
@@ -766,17 +726,23 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
766 goto err_finish; 726 goto err_finish;
767 727
768 spin_lock(&fuse_lock); 728 spin_lock(&fuse_lock);
729 err = -ENOENT;
730 if (!fc->connected)
731 goto err_unlock;
732
769 req = request_find(fc, oh.unique); 733 req = request_find(fc, oh.unique);
770 err = -EINVAL; 734 err = -EINVAL;
771 if (!req) 735 if (!req)
772 goto err_unlock; 736 goto err_unlock;
773 737
774 list_del_init(&req->list);
775 if (req->interrupted) { 738 if (req->interrupted) {
776 request_end(fc, req); 739 spin_unlock(&fuse_lock);
777 fuse_copy_finish(&cs); 740 fuse_copy_finish(&cs);
741 spin_lock(&fuse_lock);
742 request_end(fc, req);
778 return -ENOENT; 743 return -ENOENT;
779 } 744 }
745 list_move(&req->list, &fc->io);
780 req->out.h = oh; 746 req->out.h = oh;
781 req->locked = 1; 747 req->locked = 1;
782 cs.req = req; 748 cs.req = req;
@@ -830,19 +796,90 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
830 return mask; 796 return mask;
831} 797}
832 798
833/* Abort all requests on the given list (pending or processing) */ 799/*
800 * Abort all requests on the given list (pending or processing)
801 *
802 * This function releases and reacquires fuse_lock
803 */
834static void end_requests(struct fuse_conn *fc, struct list_head *head) 804static void end_requests(struct fuse_conn *fc, struct list_head *head)
835{ 805{
836 while (!list_empty(head)) { 806 while (!list_empty(head)) {
837 struct fuse_req *req; 807 struct fuse_req *req;
838 req = list_entry(head->next, struct fuse_req, list); 808 req = list_entry(head->next, struct fuse_req, list);
839 list_del_init(&req->list);
840 req->out.h.error = -ECONNABORTED; 809 req->out.h.error = -ECONNABORTED;
841 request_end(fc, req); 810 request_end(fc, req);
842 spin_lock(&fuse_lock); 811 spin_lock(&fuse_lock);
843 } 812 }
844} 813}
845 814
815/*
816 * Abort requests under I/O
817 *
818 * The requests are set to interrupted and finished, and the request
819 * waiter is woken up. This will make request_wait_answer() wait
820 * until the request is unlocked and then return.
821 *
822 * If the request is asynchronous, then the end function needs to be
823 * called after waiting for the request to be unlocked (if it was
824 * locked).
825 */
826static void end_io_requests(struct fuse_conn *fc)
827{
828 while (!list_empty(&fc->io)) {
829 struct fuse_req *req =
830 list_entry(fc->io.next, struct fuse_req, list);
831 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
832
833 req->interrupted = 1;
834 req->out.h.error = -ECONNABORTED;
835 req->state = FUSE_REQ_FINISHED;
836 list_del_init(&req->list);
837 wake_up(&req->waitq);
838 if (end) {
839 req->end = NULL;
840 /* The end function will consume this reference */
841 __fuse_get_request(req);
842 spin_unlock(&fuse_lock);
843 wait_event(req->waitq, !req->locked);
844 end(fc, req);
845 spin_lock(&fuse_lock);
846 }
847 }
848}
849
850/*
851 * Abort all requests.
852 *
853 * Emergency exit in case of a malicious or accidental deadlock, or
854 * just a hung filesystem.
855 *
856 * The same effect is usually achievable through killing the
857 * filesystem daemon and all users of the filesystem. The exception
858 * is the combination of an asynchronous request and the tricky
859 * deadlock (see Documentation/filesystems/fuse.txt).
860 *
861 * During the aborting, progression of requests from the pending and
862 * processing lists onto the io list, and progression of new requests
863 * onto the pending list is prevented by req->connected being false.
864 *
865 * Progression of requests under I/O to the processing list is
866 * prevented by the req->interrupted flag being true for these
867 * requests. For this reason requests on the io list must be aborted
868 * first.
869 */
870void fuse_abort_conn(struct fuse_conn *fc)
871{
872 spin_lock(&fuse_lock);
873 if (fc->connected) {
874 fc->connected = 0;
875 end_io_requests(fc);
876 end_requests(fc, &fc->pending);
877 end_requests(fc, &fc->processing);
878 wake_up_all(&fc->waitq);
879 }
880 spin_unlock(&fuse_lock);
881}
882
846static int fuse_dev_release(struct inode *inode, struct file *file) 883static int fuse_dev_release(struct inode *inode, struct file *file)
847{ 884{
848 struct fuse_conn *fc; 885 struct fuse_conn *fc;
@@ -853,9 +890,11 @@ static int fuse_dev_release(struct inode *inode, struct file *file)
853 fc->connected = 0; 890 fc->connected = 0;
854 end_requests(fc, &fc->pending); 891 end_requests(fc, &fc->pending);
855 end_requests(fc, &fc->processing); 892 end_requests(fc, &fc->processing);
856 fuse_release_conn(fc);
857 } 893 }
858 spin_unlock(&fuse_lock); 894 spin_unlock(&fuse_lock);
895 if (fc)
896 kobject_put(&fc->kobj);
897
859 return 0; 898 return 0;
860} 899}
861 900
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 417bcee466f6..21fd59c7bc24 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -23,8 +23,7 @@
23/* 23/*
24 * Calculate the time in jiffies until a dentry/attributes are valid 24 * Calculate the time in jiffies until a dentry/attributes are valid
25 */ 25 */
26static inline unsigned long time_to_jiffies(unsigned long sec, 26static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec)
27 unsigned long nsec)
28{ 27{
29 struct timespec ts = {sec, nsec}; 28 struct timespec ts = {sec, nsec};
30 return jiffies + timespec_to_jiffies(&ts); 29 return jiffies + timespec_to_jiffies(&ts);
@@ -157,7 +156,7 @@ static int dir_alias(struct inode *inode)
157 return 0; 156 return 0;
158} 157}
159 158
160static inline int invalid_nodeid(u64 nodeid) 159static int invalid_nodeid(u64 nodeid)
161{ 160{
162 return !nodeid || nodeid == FUSE_ROOT_ID; 161 return !nodeid || nodeid == FUSE_ROOT_ID;
163} 162}
@@ -166,7 +165,7 @@ static struct dentry_operations fuse_dentry_operations = {
166 .d_revalidate = fuse_dentry_revalidate, 165 .d_revalidate = fuse_dentry_revalidate,
167}; 166};
168 167
169static inline int valid_mode(int m) 168static int valid_mode(int m)
170{ 169{
171 return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || 170 return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
172 S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m); 171 S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
@@ -763,13 +762,6 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
763 return 0; 762 return 0;
764} 763}
765 764
766static inline size_t fuse_send_readdir(struct fuse_req *req, struct file *file,
767 struct inode *inode, loff_t pos,
768 size_t count)
769{
770 return fuse_send_read_common(req, file, inode, pos, count, 1);
771}
772
773static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) 765static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
774{ 766{
775 int err; 767 int err;
@@ -793,7 +785,9 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
793 } 785 }
794 req->num_pages = 1; 786 req->num_pages = 1;
795 req->pages[0] = page; 787 req->pages[0] = page;
796 nbytes = fuse_send_readdir(req, file, inode, file->f_pos, PAGE_SIZE); 788 fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR);
789 request_send(fc, req);
790 nbytes = req->out.args[0].size;
797 err = req->out.h.error; 791 err = req->out.h.error;
798 fuse_put_request(fc, req); 792 fuse_put_request(fc, req);
799 if (!err) 793 if (!err)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 05dedddf4289..a7ef5e716f3c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -113,6 +113,14 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
113 return err; 113 return err;
114} 114}
115 115
116/* Special case for failed iget in CREATE */
117static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
118{
119 u64 nodeid = req->in.h.nodeid;
120 fuse_reset_request(req);
121 fuse_send_forget(fc, req, nodeid, 1);
122}
123
116void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff, 124void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff,
117 u64 nodeid, struct inode *inode, int flags, int isdir) 125 u64 nodeid, struct inode *inode, int flags, int isdir)
118{ 126{
@@ -128,6 +136,8 @@ void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff,
128 req->in.args[0].size = sizeof(struct fuse_release_in); 136 req->in.args[0].size = sizeof(struct fuse_release_in);
129 req->in.args[0].value = inarg; 137 req->in.args[0].value = inarg;
130 request_send_background(fc, req); 138 request_send_background(fc, req);
139 if (!inode)
140 req->end = fuse_release_end;
131 kfree(ff); 141 kfree(ff);
132} 142}
133 143
@@ -240,38 +250,35 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
240 return fuse_fsync_common(file, de, datasync, 0); 250 return fuse_fsync_common(file, de, datasync, 0);
241} 251}
242 252
243size_t fuse_send_read_common(struct fuse_req *req, struct file *file, 253void fuse_read_fill(struct fuse_req *req, struct file *file,
244 struct inode *inode, loff_t pos, size_t count, 254 struct inode *inode, loff_t pos, size_t count, int opcode)
245 int isdir)
246{ 255{
247 struct fuse_conn *fc = get_fuse_conn(inode);
248 struct fuse_file *ff = file->private_data; 256 struct fuse_file *ff = file->private_data;
249 struct fuse_read_in inarg; 257 struct fuse_read_in *inarg = &req->misc.read_in;
250 258
251 memset(&inarg, 0, sizeof(struct fuse_read_in)); 259 inarg->fh = ff->fh;
252 inarg.fh = ff->fh; 260 inarg->offset = pos;
253 inarg.offset = pos; 261 inarg->size = count;
254 inarg.size = count; 262 req->in.h.opcode = opcode;
255 req->in.h.opcode = isdir ? FUSE_READDIR : FUSE_READ;
256 req->in.h.nodeid = get_node_id(inode); 263 req->in.h.nodeid = get_node_id(inode);
257 req->inode = inode; 264 req->inode = inode;
258 req->file = file; 265 req->file = file;
259 req->in.numargs = 1; 266 req->in.numargs = 1;
260 req->in.args[0].size = sizeof(struct fuse_read_in); 267 req->in.args[0].size = sizeof(struct fuse_read_in);
261 req->in.args[0].value = &inarg; 268 req->in.args[0].value = inarg;
262 req->out.argpages = 1; 269 req->out.argpages = 1;
263 req->out.argvar = 1; 270 req->out.argvar = 1;
264 req->out.numargs = 1; 271 req->out.numargs = 1;
265 req->out.args[0].size = count; 272 req->out.args[0].size = count;
266 request_send(fc, req);
267 return req->out.args[0].size;
268} 273}
269 274
270static inline size_t fuse_send_read(struct fuse_req *req, struct file *file, 275static size_t fuse_send_read(struct fuse_req *req, struct file *file,
271 struct inode *inode, loff_t pos, 276 struct inode *inode, loff_t pos, size_t count)
272 size_t count)
273{ 277{
274 return fuse_send_read_common(req, file, inode, pos, count, 0); 278 struct fuse_conn *fc = get_fuse_conn(inode);
279 fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
280 request_send(fc, req);
281 return req->out.args[0].size;
275} 282}
276 283
277static int fuse_readpage(struct file *file, struct page *page) 284static int fuse_readpage(struct file *file, struct page *page)
@@ -304,21 +311,33 @@ static int fuse_readpage(struct file *file, struct page *page)
304 return err; 311 return err;
305} 312}
306 313
307static int fuse_send_readpages(struct fuse_req *req, struct file *file, 314static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
308 struct inode *inode)
309{ 315{
310 loff_t pos = page_offset(req->pages[0]); 316 int i;
311 size_t count = req->num_pages << PAGE_CACHE_SHIFT; 317
312 unsigned i; 318 fuse_invalidate_attr(req->pages[0]->mapping->host); /* atime changed */
313 req->out.page_zeroing = 1; 319
314 fuse_send_read(req, file, inode, pos, count);
315 for (i = 0; i < req->num_pages; i++) { 320 for (i = 0; i < req->num_pages; i++) {
316 struct page *page = req->pages[i]; 321 struct page *page = req->pages[i];
317 if (!req->out.h.error) 322 if (!req->out.h.error)
318 SetPageUptodate(page); 323 SetPageUptodate(page);
324 else
325 SetPageError(page);
319 unlock_page(page); 326 unlock_page(page);
320 } 327 }
321 return req->out.h.error; 328 fuse_put_request(fc, req);
329}
330
331static void fuse_send_readpages(struct fuse_req *req, struct file *file,
332 struct inode *inode)
333{
334 struct fuse_conn *fc = get_fuse_conn(inode);
335 loff_t pos = page_offset(req->pages[0]);
336 size_t count = req->num_pages << PAGE_CACHE_SHIFT;
337 req->out.page_zeroing = 1;
338 req->end = fuse_readpages_end;
339 fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
340 request_send_background(fc, req);
322} 341}
323 342
324struct fuse_readpages_data { 343struct fuse_readpages_data {
@@ -338,12 +357,12 @@ static int fuse_readpages_fill(void *_data, struct page *page)
338 (req->num_pages == FUSE_MAX_PAGES_PER_REQ || 357 (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
339 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || 358 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
340 req->pages[req->num_pages - 1]->index + 1 != page->index)) { 359 req->pages[req->num_pages - 1]->index + 1 != page->index)) {
341 int err = fuse_send_readpages(req, data->file, inode); 360 fuse_send_readpages(req, data->file, inode);
342 if (err) { 361 data->req = req = fuse_get_request(fc);
362 if (!req) {
343 unlock_page(page); 363 unlock_page(page);
344 return err; 364 return -EINTR;
345 } 365 }
346 fuse_reset_request(req);
347 } 366 }
348 req->pages[req->num_pages] = page; 367 req->pages[req->num_pages] = page;
349 req->num_pages ++; 368 req->num_pages ++;
@@ -368,10 +387,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
368 return -EINTR; 387 return -EINTR;
369 388
370 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); 389 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
371 if (!err && data.req->num_pages) 390 if (!err)
372 err = fuse_send_readpages(data.req, file, inode); 391 fuse_send_readpages(data.req, file, inode);
373 fuse_put_request(fc, data.req);
374 fuse_invalidate_attr(inode); /* atime changed */
375 return err; 392 return err;
376} 393}
377 394
@@ -560,9 +577,9 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
560 struct inode *inode = file->f_dentry->d_inode; 577 struct inode *inode = file->f_dentry->d_inode;
561 ssize_t res; 578 ssize_t res;
562 /* Don't allow parallel writes to the same file */ 579 /* Don't allow parallel writes to the same file */
563 down(&inode->i_sem); 580 mutex_lock(&inode->i_mutex);
564 res = fuse_direct_io(file, buf, count, ppos, 1); 581 res = fuse_direct_io(file, buf, count, ppos, 1);
565 up(&inode->i_sem); 582 mutex_unlock(&inode->i_mutex);
566 return res; 583 return res;
567} 584}
568 585
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 74c8d098a14a..46cf933aa3bf 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -94,6 +94,11 @@ struct fuse_out {
94 /** Header returned from userspace */ 94 /** Header returned from userspace */
95 struct fuse_out_header h; 95 struct fuse_out_header h;
96 96
97 /*
98 * The following bitfields are not changed during the request
99 * processing
100 */
101
97 /** Last argument is variable length (can be shorter than 102 /** Last argument is variable length (can be shorter than
98 arg->size) */ 103 arg->size) */
99 unsigned argvar:1; 104 unsigned argvar:1;
@@ -111,12 +116,23 @@ struct fuse_out {
111 struct fuse_arg args[3]; 116 struct fuse_arg args[3];
112}; 117};
113 118
119/** The request state */
120enum fuse_req_state {
121 FUSE_REQ_INIT = 0,
122 FUSE_REQ_PENDING,
123 FUSE_REQ_READING,
124 FUSE_REQ_SENT,
125 FUSE_REQ_FINISHED
126};
127
128struct fuse_conn;
129
114/** 130/**
115 * A request to the client 131 * A request to the client
116 */ 132 */
117struct fuse_req { 133struct fuse_req {
118 /** This can be on either unused_list, pending or processing 134 /** This can be on either unused_list, pending processing or
119 lists in fuse_conn */ 135 io lists in fuse_conn */
120 struct list_head list; 136 struct list_head list;
121 137
122 /** Entry on the background list */ 138 /** Entry on the background list */
@@ -125,6 +141,12 @@ struct fuse_req {
125 /** refcount */ 141 /** refcount */
126 atomic_t count; 142 atomic_t count;
127 143
144 /*
145 * The following bitfields are either set once before the
146 * request is queued or setting/clearing them is protected by
147 * fuse_lock
148 */
149
128 /** True if the request has reply */ 150 /** True if the request has reply */
129 unsigned isreply:1; 151 unsigned isreply:1;
130 152
@@ -140,11 +162,8 @@ struct fuse_req {
140 /** Data is being copied to/from the request */ 162 /** Data is being copied to/from the request */
141 unsigned locked:1; 163 unsigned locked:1;
142 164
143 /** Request has been sent to userspace */ 165 /** State of the request */
144 unsigned sent:1; 166 enum fuse_req_state state;
145
146 /** The request is finished */
147 unsigned finished:1;
148 167
149 /** The request input */ 168 /** The request input */
150 struct fuse_in in; 169 struct fuse_in in;
@@ -161,6 +180,7 @@ struct fuse_req {
161 struct fuse_release_in release_in; 180 struct fuse_release_in release_in;
162 struct fuse_init_in init_in; 181 struct fuse_init_in init_in;
163 struct fuse_init_out init_out; 182 struct fuse_init_out init_out;
183 struct fuse_read_in read_in;
164 } misc; 184 } misc;
165 185
166 /** page vector */ 186 /** page vector */
@@ -180,6 +200,9 @@ struct fuse_req {
180 200
181 /** File used in the request (or NULL) */ 201 /** File used in the request (or NULL) */
182 struct file *file; 202 struct file *file;
203
204 /** Request completion callback */
205 void (*end)(struct fuse_conn *, struct fuse_req *);
183}; 206};
184 207
185/** 208/**
@@ -190,9 +213,6 @@ struct fuse_req {
190 * unmounted. 213 * unmounted.
191 */ 214 */
192struct fuse_conn { 215struct fuse_conn {
193 /** Reference count */
194 int count;
195
196 /** The user id for this mount */ 216 /** The user id for this mount */
197 uid_t user_id; 217 uid_t user_id;
198 218
@@ -217,6 +237,9 @@ struct fuse_conn {
217 /** The list of requests being processed */ 237 /** The list of requests being processed */
218 struct list_head processing; 238 struct list_head processing;
219 239
240 /** The list of requests under I/O */
241 struct list_head io;
242
220 /** Requests put in the background (RELEASE or any other 243 /** Requests put in the background (RELEASE or any other
221 interrupted request) */ 244 interrupted request) */
222 struct list_head background; 245 struct list_head background;
@@ -238,14 +261,22 @@ struct fuse_conn {
238 u64 reqctr; 261 u64 reqctr;
239 262
240 /** Mount is active */ 263 /** Mount is active */
241 unsigned mounted : 1; 264 unsigned mounted;
242 265
243 /** Connection established */ 266 /** Connection established, cleared on umount, connection
244 unsigned connected : 1; 267 abort and device release */
268 unsigned connected;
245 269
246 /** Connection failed (version mismatch) */ 270 /** Connection failed (version mismatch). Cannot race with
271 setting other bitfields since it is only set once in INIT
272 reply, before any other request, and never cleared */
247 unsigned conn_error : 1; 273 unsigned conn_error : 1;
248 274
275 /*
276 * The following bitfields are only for optimization purposes
277 * and hence races in setting them will not cause malfunction
278 */
279
249 /** Is fsync not implemented by fs? */ 280 /** Is fsync not implemented by fs? */
250 unsigned no_fsync : 1; 281 unsigned no_fsync : 1;
251 282
@@ -273,21 +304,22 @@ struct fuse_conn {
273 /** Is create not implemented by fs? */ 304 /** Is create not implemented by fs? */
274 unsigned no_create : 1; 305 unsigned no_create : 1;
275 306
307 /** The number of requests waiting for completion */
308 atomic_t num_waiting;
309
276 /** Negotiated minor version */ 310 /** Negotiated minor version */
277 unsigned minor; 311 unsigned minor;
278 312
279 /** Backing dev info */ 313 /** Backing dev info */
280 struct backing_dev_info bdi; 314 struct backing_dev_info bdi;
281};
282 315
283static inline struct fuse_conn **get_fuse_conn_super_p(struct super_block *sb) 316 /** kobject */
284{ 317 struct kobject kobj;
285 return (struct fuse_conn **) &sb->s_fs_info; 318};
286}
287 319
288static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) 320static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
289{ 321{
290 return *get_fuse_conn_super_p(sb); 322 return sb->s_fs_info;
291} 323}
292 324
293static inline struct fuse_conn *get_fuse_conn(struct inode *inode) 325static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
@@ -295,6 +327,11 @@ static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
295 return get_fuse_conn_super(inode->i_sb); 327 return get_fuse_conn_super(inode->i_sb);
296} 328}
297 329
330static inline struct fuse_conn *get_fuse_conn_kobj(struct kobject *obj)
331{
332 return container_of(obj, struct fuse_conn, kobj);
333}
334
298static inline struct fuse_inode *get_fuse_inode(struct inode *inode) 335static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
299{ 336{
300 return container_of(inode, struct fuse_inode, inode); 337 return container_of(inode, struct fuse_inode, inode);
@@ -336,11 +373,10 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
336 unsigned long nodeid, u64 nlookup); 373 unsigned long nodeid, u64 nlookup);
337 374
338/** 375/**
339 * Send READ or READDIR request 376 * Initialize READ or READDIR request
340 */ 377 */
341size_t fuse_send_read_common(struct fuse_req *req, struct file *file, 378void fuse_read_fill(struct fuse_req *req, struct file *file,
342 struct inode *inode, loff_t pos, size_t count, 379 struct inode *inode, loff_t pos, size_t count, int opcode);
343 int isdir);
344 380
345/** 381/**
346 * Send OPEN or OPENDIR request 382 * Send OPEN or OPENDIR request
@@ -395,12 +431,6 @@ void fuse_init_symlink(struct inode *inode);
395void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr); 431void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr);
396 432
397/** 433/**
398 * Check if the connection can be released, and if yes, then free the
399 * connection structure
400 */
401void fuse_release_conn(struct fuse_conn *fc);
402
403/**
404 * Initialize the client device 434 * Initialize the client device
405 */ 435 */
406int fuse_dev_init(void); 436int fuse_dev_init(void);
@@ -456,6 +486,9 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
456 */ 486 */
457void fuse_release_background(struct fuse_req *req); 487void fuse_release_background(struct fuse_req *req);
458 488
489/* Abort all requests */
490void fuse_abort_conn(struct fuse_conn *fc);
491
459/** 492/**
460 * Get the attributes of a file 493 * Get the attributes of a file
461 */ 494 */
@@ -465,8 +498,3 @@ int fuse_do_getattr(struct inode *inode);
465 * Invalidate inode attributes 498 * Invalidate inode attributes
466 */ 499 */
467void fuse_invalidate_attr(struct inode *inode); 500void fuse_invalidate_attr(struct inode *inode);
468
469/**
470 * Send the INIT message
471 */
472void fuse_send_init(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 04c80cc957a3..c755a0440a66 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -24,6 +24,13 @@ MODULE_LICENSE("GPL");
24 24
25spinlock_t fuse_lock; 25spinlock_t fuse_lock;
26static kmem_cache_t *fuse_inode_cachep; 26static kmem_cache_t *fuse_inode_cachep;
27static struct subsystem connections_subsys;
28
29struct fuse_conn_attr {
30 struct attribute attr;
31 ssize_t (*show)(struct fuse_conn *, char *);
32 ssize_t (*store)(struct fuse_conn *, const char *, size_t);
33};
27 34
28#define FUSE_SUPER_MAGIC 0x65735546 35#define FUSE_SUPER_MAGIC 0x65735546
29 36
@@ -189,6 +196,11 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
189 return inode; 196 return inode;
190} 197}
191 198
199static void fuse_umount_begin(struct super_block *sb)
200{
201 fuse_abort_conn(get_fuse_conn_super(sb));
202}
203
192static void fuse_put_super(struct super_block *sb) 204static void fuse_put_super(struct super_block *sb)
193{ 205{
194 struct fuse_conn *fc = get_fuse_conn_super(sb); 206 struct fuse_conn *fc = get_fuse_conn_super(sb);
@@ -200,14 +212,13 @@ static void fuse_put_super(struct super_block *sb)
200 212
201 spin_lock(&fuse_lock); 213 spin_lock(&fuse_lock);
202 fc->mounted = 0; 214 fc->mounted = 0;
203 fc->user_id = 0; 215 fc->connected = 0;
204 fc->group_id = 0; 216 spin_unlock(&fuse_lock);
205 fc->flags = 0; 217 up_write(&fc->sbput_sem);
206 /* Flush all readers on this fs */ 218 /* Flush all readers on this fs */
207 wake_up_all(&fc->waitq); 219 wake_up_all(&fc->waitq);
208 up_write(&fc->sbput_sem); 220 kobject_del(&fc->kobj);
209 fuse_release_conn(fc); 221 kobject_put(&fc->kobj);
210 spin_unlock(&fuse_lock);
211} 222}
212 223
213static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) 224static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
@@ -356,8 +367,10 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
356 return 0; 367 return 0;
357} 368}
358 369
359static void free_conn(struct fuse_conn *fc) 370static void fuse_conn_release(struct kobject *kobj)
360{ 371{
372 struct fuse_conn *fc = get_fuse_conn_kobj(kobj);
373
361 while (!list_empty(&fc->unused_list)) { 374 while (!list_empty(&fc->unused_list)) {
362 struct fuse_req *req; 375 struct fuse_req *req;
363 req = list_entry(fc->unused_list.next, struct fuse_req, list); 376 req = list_entry(fc->unused_list.next, struct fuse_req, list);
@@ -367,33 +380,28 @@ static void free_conn(struct fuse_conn *fc)
367 kfree(fc); 380 kfree(fc);
368} 381}
369 382
370/* Must be called with the fuse lock held */
371void fuse_release_conn(struct fuse_conn *fc)
372{
373 fc->count--;
374 if (!fc->count)
375 free_conn(fc);
376}
377
378static struct fuse_conn *new_conn(void) 383static struct fuse_conn *new_conn(void)
379{ 384{
380 struct fuse_conn *fc; 385 struct fuse_conn *fc;
381 386
382 fc = kmalloc(sizeof(*fc), GFP_KERNEL); 387 fc = kzalloc(sizeof(*fc), GFP_KERNEL);
383 if (fc != NULL) { 388 if (fc) {
384 int i; 389 int i;
385 memset(fc, 0, sizeof(*fc));
386 init_waitqueue_head(&fc->waitq); 390 init_waitqueue_head(&fc->waitq);
387 INIT_LIST_HEAD(&fc->pending); 391 INIT_LIST_HEAD(&fc->pending);
388 INIT_LIST_HEAD(&fc->processing); 392 INIT_LIST_HEAD(&fc->processing);
393 INIT_LIST_HEAD(&fc->io);
389 INIT_LIST_HEAD(&fc->unused_list); 394 INIT_LIST_HEAD(&fc->unused_list);
390 INIT_LIST_HEAD(&fc->background); 395 INIT_LIST_HEAD(&fc->background);
391 sema_init(&fc->outstanding_sem, 0); 396 sema_init(&fc->outstanding_sem, 1); /* One for INIT */
392 init_rwsem(&fc->sbput_sem); 397 init_rwsem(&fc->sbput_sem);
398 kobj_set_kset_s(fc, connections_subsys);
399 kobject_init(&fc->kobj);
400 atomic_set(&fc->num_waiting, 0);
393 for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) { 401 for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) {
394 struct fuse_req *req = fuse_request_alloc(); 402 struct fuse_req *req = fuse_request_alloc();
395 if (!req) { 403 if (!req) {
396 free_conn(fc); 404 kobject_put(&fc->kobj);
397 return NULL; 405 return NULL;
398 } 406 }
399 list_add(&req->list, &fc->unused_list); 407 list_add(&req->list, &fc->unused_list);
@@ -408,25 +416,32 @@ static struct fuse_conn *new_conn(void)
408static struct fuse_conn *get_conn(struct file *file, struct super_block *sb) 416static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
409{ 417{
410 struct fuse_conn *fc; 418 struct fuse_conn *fc;
419 int err;
411 420
421 err = -EINVAL;
412 if (file->f_op != &fuse_dev_operations) 422 if (file->f_op != &fuse_dev_operations)
413 return ERR_PTR(-EINVAL); 423 goto out_err;
424
425 err = -ENOMEM;
414 fc = new_conn(); 426 fc = new_conn();
415 if (fc == NULL) 427 if (!fc)
416 return ERR_PTR(-ENOMEM); 428 goto out_err;
429
417 spin_lock(&fuse_lock); 430 spin_lock(&fuse_lock);
418 if (file->private_data) { 431 err = -EINVAL;
419 free_conn(fc); 432 if (file->private_data)
420 fc = ERR_PTR(-EINVAL); 433 goto out_unlock;
421 } else { 434
422 file->private_data = fc; 435 kobject_get(&fc->kobj);
423 *get_fuse_conn_super_p(sb) = fc; 436 file->private_data = fc;
424 fc->mounted = 1;
425 fc->connected = 1;
426 fc->count = 2;
427 }
428 spin_unlock(&fuse_lock); 437 spin_unlock(&fuse_lock);
429 return fc; 438 return fc;
439
440 out_unlock:
441 spin_unlock(&fuse_lock);
442 kobject_put(&fc->kobj);
443 out_err:
444 return ERR_PTR(err);
430} 445}
431 446
432static struct inode *get_root_inode(struct super_block *sb, unsigned mode) 447static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
@@ -445,16 +460,74 @@ static struct super_operations fuse_super_operations = {
445 .read_inode = fuse_read_inode, 460 .read_inode = fuse_read_inode,
446 .clear_inode = fuse_clear_inode, 461 .clear_inode = fuse_clear_inode,
447 .put_super = fuse_put_super, 462 .put_super = fuse_put_super,
463 .umount_begin = fuse_umount_begin,
448 .statfs = fuse_statfs, 464 .statfs = fuse_statfs,
449 .show_options = fuse_show_options, 465 .show_options = fuse_show_options,
450}; 466};
451 467
468static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
469{
470 int i;
471 struct fuse_init_out *arg = &req->misc.init_out;
472
473 if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
474 fc->conn_error = 1;
475 else {
476 fc->minor = arg->minor;
477 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
478 }
479
480 /* After INIT reply is received other requests can go
481 out. So do (FUSE_MAX_OUTSTANDING - 1) number of
482 up()s on outstanding_sem. The last up() is done in
483 fuse_putback_request() */
484 for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
485 up(&fc->outstanding_sem);
486
487 fuse_put_request(fc, req);
488}
489
490static void fuse_send_init(struct fuse_conn *fc)
491{
492 /* This is called from fuse_read_super() so there's guaranteed
493 to be exactly one request available */
494 struct fuse_req *req = fuse_get_request(fc);
495 struct fuse_init_in *arg = &req->misc.init_in;
496
497 arg->major = FUSE_KERNEL_VERSION;
498 arg->minor = FUSE_KERNEL_MINOR_VERSION;
499 req->in.h.opcode = FUSE_INIT;
500 req->in.numargs = 1;
501 req->in.args[0].size = sizeof(*arg);
502 req->in.args[0].value = arg;
503 req->out.numargs = 1;
504 /* Variable length arguement used for backward compatibility
505 with interface version < 7.5. Rest of init_out is zeroed
506 by do_get_request(), so a short reply is not a problem */
507 req->out.argvar = 1;
508 req->out.args[0].size = sizeof(struct fuse_init_out);
509 req->out.args[0].value = &req->misc.init_out;
510 req->end = process_init_reply;
511 request_send_background(fc, req);
512}
513
514static unsigned long long conn_id(void)
515{
516 static unsigned long long ctr = 1;
517 unsigned long long val;
518 spin_lock(&fuse_lock);
519 val = ctr++;
520 spin_unlock(&fuse_lock);
521 return val;
522}
523
452static int fuse_fill_super(struct super_block *sb, void *data, int silent) 524static int fuse_fill_super(struct super_block *sb, void *data, int silent)
453{ 525{
454 struct fuse_conn *fc; 526 struct fuse_conn *fc;
455 struct inode *root; 527 struct inode *root;
456 struct fuse_mount_data d; 528 struct fuse_mount_data d;
457 struct file *file; 529 struct file *file;
530 struct dentry *root_dentry;
458 int err; 531 int err;
459 532
460 if (!parse_fuse_opt((char *) data, &d)) 533 if (!parse_fuse_opt((char *) data, &d))
@@ -482,23 +555,42 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
482 if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages) 555 if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
483 fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE; 556 fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
484 557
558 /* Used by get_root_inode() */
559 sb->s_fs_info = fc;
560
485 err = -ENOMEM; 561 err = -ENOMEM;
486 root = get_root_inode(sb, d.rootmode); 562 root = get_root_inode(sb, d.rootmode);
487 if (root == NULL) 563 if (!root)
488 goto err; 564 goto err;
489 565
490 sb->s_root = d_alloc_root(root); 566 root_dentry = d_alloc_root(root);
491 if (!sb->s_root) { 567 if (!root_dentry) {
492 iput(root); 568 iput(root);
493 goto err; 569 goto err;
494 } 570 }
571
572 err = kobject_set_name(&fc->kobj, "%llu", conn_id());
573 if (err)
574 goto err_put_root;
575
576 err = kobject_add(&fc->kobj);
577 if (err)
578 goto err_put_root;
579
580 sb->s_root = root_dentry;
581 spin_lock(&fuse_lock);
582 fc->mounted = 1;
583 fc->connected = 1;
584 spin_unlock(&fuse_lock);
585
495 fuse_send_init(fc); 586 fuse_send_init(fc);
587
496 return 0; 588 return 0;
497 589
590 err_put_root:
591 dput(root_dentry);
498 err: 592 err:
499 spin_lock(&fuse_lock); 593 kobject_put(&fc->kobj);
500 fuse_release_conn(fc);
501 spin_unlock(&fuse_lock);
502 return err; 594 return err;
503} 595}
504 596
@@ -516,6 +608,69 @@ static struct file_system_type fuse_fs_type = {
516 .kill_sb = kill_anon_super, 608 .kill_sb = kill_anon_super,
517}; 609};
518 610
611static ssize_t fuse_conn_waiting_show(struct fuse_conn *fc, char *page)
612{
613 return sprintf(page, "%i\n", atomic_read(&fc->num_waiting));
614}
615
616static ssize_t fuse_conn_abort_store(struct fuse_conn *fc, const char *page,
617 size_t count)
618{
619 fuse_abort_conn(fc);
620 return count;
621}
622
623static struct fuse_conn_attr fuse_conn_waiting =
624 __ATTR(waiting, 0400, fuse_conn_waiting_show, NULL);
625static struct fuse_conn_attr fuse_conn_abort =
626 __ATTR(abort, 0600, NULL, fuse_conn_abort_store);
627
628static struct attribute *fuse_conn_attrs[] = {
629 &fuse_conn_waiting.attr,
630 &fuse_conn_abort.attr,
631 NULL,
632};
633
634static ssize_t fuse_conn_attr_show(struct kobject *kobj,
635 struct attribute *attr,
636 char *page)
637{
638 struct fuse_conn_attr *fca =
639 container_of(attr, struct fuse_conn_attr, attr);
640
641 if (fca->show)
642 return fca->show(get_fuse_conn_kobj(kobj), page);
643 else
644 return -EACCES;
645}
646
647static ssize_t fuse_conn_attr_store(struct kobject *kobj,
648 struct attribute *attr,
649 const char *page, size_t count)
650{
651 struct fuse_conn_attr *fca =
652 container_of(attr, struct fuse_conn_attr, attr);
653
654 if (fca->store)
655 return fca->store(get_fuse_conn_kobj(kobj), page, count);
656 else
657 return -EACCES;
658}
659
660static struct sysfs_ops fuse_conn_sysfs_ops = {
661 .show = &fuse_conn_attr_show,
662 .store = &fuse_conn_attr_store,
663};
664
665static struct kobj_type ktype_fuse_conn = {
666 .release = fuse_conn_release,
667 .sysfs_ops = &fuse_conn_sysfs_ops,
668 .default_attrs = fuse_conn_attrs,
669};
670
671static decl_subsys(fuse, NULL, NULL);
672static decl_subsys(connections, &ktype_fuse_conn, NULL);
673
519static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep, 674static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep,
520 unsigned long flags) 675 unsigned long flags)
521{ 676{
@@ -553,6 +708,34 @@ static void fuse_fs_cleanup(void)
553 kmem_cache_destroy(fuse_inode_cachep); 708 kmem_cache_destroy(fuse_inode_cachep);
554} 709}
555 710
711static int fuse_sysfs_init(void)
712{
713 int err;
714
715 kset_set_kset_s(&fuse_subsys, fs_subsys);
716 err = subsystem_register(&fuse_subsys);
717 if (err)
718 goto out_err;
719
720 kset_set_kset_s(&connections_subsys, fuse_subsys);
721 err = subsystem_register(&connections_subsys);
722 if (err)
723 goto out_fuse_unregister;
724
725 return 0;
726
727 out_fuse_unregister:
728 subsystem_unregister(&fuse_subsys);
729 out_err:
730 return err;
731}
732
733static void fuse_sysfs_cleanup(void)
734{
735 subsystem_unregister(&connections_subsys);
736 subsystem_unregister(&fuse_subsys);
737}
738
556static int __init fuse_init(void) 739static int __init fuse_init(void)
557{ 740{
558 int res; 741 int res;
@@ -569,8 +752,14 @@ static int __init fuse_init(void)
569 if (res) 752 if (res)
570 goto err_fs_cleanup; 753 goto err_fs_cleanup;
571 754
755 res = fuse_sysfs_init();
756 if (res)
757 goto err_dev_cleanup;
758
572 return 0; 759 return 0;
573 760
761 err_dev_cleanup:
762 fuse_dev_cleanup();
574 err_fs_cleanup: 763 err_fs_cleanup:
575 fuse_fs_cleanup(); 764 fuse_fs_cleanup();
576 err: 765 err:
@@ -581,6 +770,7 @@ static void __exit fuse_exit(void)
581{ 770{
582 printk(KERN_DEBUG "fuse exit\n"); 771 printk(KERN_DEBUG "fuse exit\n");
583 772
773 fuse_sysfs_cleanup();
584 fuse_fs_cleanup(); 774 fuse_fs_cleanup();
585 fuse_dev_cleanup(); 775 fuse_dev_cleanup();
586} 776}
diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c
index 89450ae32228..f13f1494d4fe 100644
--- a/fs/hfs/bfind.c
+++ b/fs/hfs/bfind.c
@@ -64,7 +64,6 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
64 else 64 else
65 e = rec - 1; 65 e = rec - 1;
66 } while (b <= e); 66 } while (b <= e);
67 //printk("%d: %d,%d,%d\n", bnode->this, b, e, rec);
68 if (rec != e && e >= 0) { 67 if (rec != e && e >= 0) {
69 len = hfs_brec_lenoff(bnode, e, &off); 68 len = hfs_brec_lenoff(bnode, e, &off);
70 keylen = hfs_brec_keylen(bnode, e); 69 keylen = hfs_brec_keylen(bnode, e);
@@ -127,7 +126,7 @@ int hfs_brec_find(struct hfs_find_data *fd)
127 return res; 126 return res;
128 127
129invalid: 128invalid:
130 printk("HFS: inconsistency in B*Tree (%d,%d,%d,%u,%u)\n", 129 printk(KERN_ERR "hfs: inconsistency in B*Tree (%d,%d,%d,%u,%u)\n",
131 height, bnode->height, bnode->type, nidx, parent); 130 height, bnode->height, bnode->type, nidx, parent);
132 res = -EIO; 131 res = -EIO;
133release: 132release:
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 3d5cdc6847c0..a7a7d77f3fd3 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -198,7 +198,7 @@ void hfs_bnode_unlink(struct hfs_bnode *node)
198 198
199 // move down? 199 // move down?
200 if (!node->prev && !node->next) { 200 if (!node->prev && !node->next) {
201 printk("hfs_btree_del_level\n"); 201 printk(KERN_DEBUG "hfs_btree_del_level\n");
202 } 202 }
203 if (!node->parent) { 203 if (!node->parent) {
204 tree->root = 0; 204 tree->root = 0;
@@ -219,7 +219,7 @@ struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid)
219 struct hfs_bnode *node; 219 struct hfs_bnode *node;
220 220
221 if (cnid >= tree->node_count) { 221 if (cnid >= tree->node_count) {
222 printk("HFS: request for non-existent node %d in B*Tree\n", cnid); 222 printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid);
223 return NULL; 223 return NULL;
224 } 224 }
225 225
@@ -242,7 +242,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
242 loff_t off; 242 loff_t off;
243 243
244 if (cnid >= tree->node_count) { 244 if (cnid >= tree->node_count) {
245 printk("HFS: request for non-existent node %d in B*Tree\n", cnid); 245 printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid);
246 return NULL; 246 return NULL;
247 } 247 }
248 248
diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c
index 7d8fff2c25fc..5c87cf4801fc 100644
--- a/fs/hfs/brec.c
+++ b/fs/hfs/brec.c
@@ -362,7 +362,7 @@ again:
362 end_off = hfs_bnode_read_u16(parent, end_rec_off); 362 end_off = hfs_bnode_read_u16(parent, end_rec_off);
363 if (end_rec_off - end_off < diff) { 363 if (end_rec_off - end_off < diff) {
364 364
365 printk("splitting index node...\n"); 365 printk(KERN_DEBUG "hfs: splitting index node...\n");
366 fd->bnode = parent; 366 fd->bnode = parent;
367 new_node = hfs_bnode_split(fd); 367 new_node = hfs_bnode_split(fd);
368 if (IS_ERR(new_node)) 368 if (IS_ERR(new_node))
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 394725efa1c8..7bb11edd1488 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -111,7 +111,7 @@ void hfs_btree_close(struct hfs_btree *tree)
111 while ((node = tree->node_hash[i])) { 111 while ((node = tree->node_hash[i])) {
112 tree->node_hash[i] = node->next_hash; 112 tree->node_hash[i] = node->next_hash;
113 if (atomic_read(&node->refcnt)) 113 if (atomic_read(&node->refcnt))
114 printk("HFS: node %d:%d still has %d user(s)!\n", 114 printk(KERN_ERR "hfs: node %d:%d still has %d user(s)!\n",
115 node->tree->cnid, node->this, atomic_read(&node->refcnt)); 115 node->tree->cnid, node->this, atomic_read(&node->refcnt));
116 hfs_bnode_free(node); 116 hfs_bnode_free(node);
117 tree->node_hash_cnt--; 117 tree->node_hash_cnt--;
@@ -252,7 +252,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
252 kunmap(*pagep); 252 kunmap(*pagep);
253 nidx = node->next; 253 nidx = node->next;
254 if (!nidx) { 254 if (!nidx) {
255 printk("create new bmap node...\n"); 255 printk(KERN_DEBUG "hfs: create new bmap node...\n");
256 next_node = hfs_bmap_new_bmap(node, idx); 256 next_node = hfs_bmap_new_bmap(node, idx);
257 } else 257 } else
258 next_node = hfs_bnode_find(tree, nidx); 258 next_node = hfs_bnode_find(tree, nidx);
@@ -292,7 +292,7 @@ void hfs_bmap_free(struct hfs_bnode *node)
292 hfs_bnode_put(node); 292 hfs_bnode_put(node);
293 if (!i) { 293 if (!i) {
294 /* panic */; 294 /* panic */;
295 printk("HFS: unable to free bnode %u. bmap not found!\n", node->this); 295 printk(KERN_CRIT "hfs: unable to free bnode %u. bmap not found!\n", node->this);
296 return; 296 return;
297 } 297 }
298 node = hfs_bnode_find(tree, i); 298 node = hfs_bnode_find(tree, i);
@@ -300,7 +300,7 @@ void hfs_bmap_free(struct hfs_bnode *node)
300 return; 300 return;
301 if (node->type != HFS_NODE_MAP) { 301 if (node->type != HFS_NODE_MAP) {
302 /* panic */; 302 /* panic */;
303 printk("HFS: invalid bmap found! (%u,%d)\n", node->this, node->type); 303 printk(KERN_CRIT "hfs: invalid bmap found! (%u,%d)\n", node->this, node->type);
304 hfs_bnode_put(node); 304 hfs_bnode_put(node);
305 return; 305 return;
306 } 306 }
@@ -313,7 +313,7 @@ void hfs_bmap_free(struct hfs_bnode *node)
313 m = 1 << (~nidx & 7); 313 m = 1 << (~nidx & 7);
314 byte = data[off]; 314 byte = data[off];
315 if (!(byte & m)) { 315 if (!(byte & m)) {
316 printk("HFS: trying to free free bnode %u(%d)\n", node->this, node->type); 316 printk(KERN_CRIT "hfs: trying to free free bnode %u(%d)\n", node->this, node->type);
317 kunmap(page); 317 kunmap(page);
318 hfs_bnode_put(node); 318 hfs_bnode_put(node);
319 return; 319 return;
diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c
index 2fcd679f0238..ba851576ebb1 100644
--- a/fs/hfs/catalog.c
+++ b/fs/hfs/catalog.c
@@ -184,7 +184,7 @@ int hfs_cat_find_brec(struct super_block *sb, u32 cnid,
184 184
185 type = rec.type; 185 type = rec.type;
186 if (type != HFS_CDR_THD && type != HFS_CDR_FTH) { 186 if (type != HFS_CDR_THD && type != HFS_CDR_FTH) {
187 printk("HFS-fs: Found bad thread record in catalog\n"); 187 printk(KERN_ERR "hfs: found bad thread record in catalog\n");
188 return -EIO; 188 return -EIO;
189 } 189 }
190 190
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index e1f24befba58..534e5a7480ef 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -81,12 +81,12 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
81 case 1: 81 case 1:
82 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); 82 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength);
83 if (entry.type != HFS_CDR_THD) { 83 if (entry.type != HFS_CDR_THD) {
84 printk("HFS: bad catalog folder thread\n"); 84 printk(KERN_ERR "hfs: bad catalog folder thread\n");
85 err = -EIO; 85 err = -EIO;
86 goto out; 86 goto out;
87 } 87 }
88 //if (fd.entrylength < HFS_MIN_THREAD_SZ) { 88 //if (fd.entrylength < HFS_MIN_THREAD_SZ) {
89 // printk("HFS: truncated catalog thread\n"); 89 // printk(KERN_ERR "hfs: truncated catalog thread\n");
90 // err = -EIO; 90 // err = -EIO;
91 // goto out; 91 // goto out;
92 //} 92 //}
@@ -105,7 +105,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
105 105
106 for (;;) { 106 for (;;) {
107 if (be32_to_cpu(fd.key->cat.ParID) != inode->i_ino) { 107 if (be32_to_cpu(fd.key->cat.ParID) != inode->i_ino) {
108 printk("HFS: walked past end of dir\n"); 108 printk(KERN_ERR "hfs: walked past end of dir\n");
109 err = -EIO; 109 err = -EIO;
110 goto out; 110 goto out;
111 } 111 }
@@ -114,7 +114,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
114 len = hfs_mac2asc(sb, strbuf, &fd.key->cat.CName); 114 len = hfs_mac2asc(sb, strbuf, &fd.key->cat.CName);
115 if (type == HFS_CDR_DIR) { 115 if (type == HFS_CDR_DIR) {
116 if (fd.entrylength < sizeof(struct hfs_cat_dir)) { 116 if (fd.entrylength < sizeof(struct hfs_cat_dir)) {
117 printk("HFS: small dir entry\n"); 117 printk(KERN_ERR "hfs: small dir entry\n");
118 err = -EIO; 118 err = -EIO;
119 goto out; 119 goto out;
120 } 120 }
@@ -123,7 +123,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
123 break; 123 break;
124 } else if (type == HFS_CDR_FIL) { 124 } else if (type == HFS_CDR_FIL) {
125 if (fd.entrylength < sizeof(struct hfs_cat_file)) { 125 if (fd.entrylength < sizeof(struct hfs_cat_file)) {
126 printk("HFS: small file entry\n"); 126 printk(KERN_ERR "hfs: small file entry\n");
127 err = -EIO; 127 err = -EIO;
128 goto out; 128 goto out;
129 } 129 }
@@ -131,7 +131,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
131 be32_to_cpu(entry.file.FlNum), DT_REG)) 131 be32_to_cpu(entry.file.FlNum), DT_REG))
132 break; 132 break;
133 } else { 133 } else {
134 printk("HFS: bad catalog entry type %d\n", type); 134 printk(KERN_ERR "hfs: bad catalog entry type %d\n", type);
135 err = -EIO; 135 err = -EIO;
136 goto out; 136 goto out;
137 } 137 }
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index cc5dcd52e23d..18ce47ab1b71 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -35,9 +35,6 @@
35#define dprint(flg, fmt, args...) \ 35#define dprint(flg, fmt, args...) \
36 if (flg & DBG_MASK) printk(fmt , ## args) 36 if (flg & DBG_MASK) printk(fmt , ## args)
37 37
38#define hfs_warn(format, args...) printk(KERN_WARNING format , ## args)
39#define hfs_error(format, args...) printk(KERN_ERR format , ## args)
40
41/* 38/*
42 * struct hfs_inode_info 39 * struct hfs_inode_info
43 * 40 *
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index d499393a8ae7..39fd85b9b916 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -95,7 +95,6 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
95 } while (--i && nidx < tree->node_count); 95 } while (--i && nidx < tree->node_count);
96 spin_unlock(&tree->hash_lock); 96 spin_unlock(&tree->hash_lock);
97 } 97 }
98 //printk("releasepage: %lu,%x = %d\n", page->index, mask, res);
99 return res ? try_to_free_buffers(page) : 0; 98 return res ? try_to_free_buffers(page) : 0;
100} 99}
101 100
@@ -547,13 +546,13 @@ static int hfs_file_release(struct inode *inode, struct file *file)
547 if (atomic_read(&file->f_count) != 0) 546 if (atomic_read(&file->f_count) != 0)
548 return 0; 547 return 0;
549 if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) { 548 if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) {
550 down(&inode->i_sem); 549 mutex_lock(&inode->i_mutex);
551 hfs_file_truncate(inode); 550 hfs_file_truncate(inode);
552 //if (inode->i_flags & S_DEAD) { 551 //if (inode->i_flags & S_DEAD) {
553 // hfs_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL); 552 // hfs_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL);
554 // hfs_delete_inode(inode); 553 // hfs_delete_inode(inode);
555 //} 554 //}
556 up(&inode->i_sem); 555 mutex_unlock(&inode->i_mutex);
557 } 556 }
558 return 0; 557 return 0;
559} 558}
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 0a473f79c89f..b4651e128d7f 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -47,7 +47,7 @@ static int hfs_get_last_session(struct super_block *sb,
47 *start = (sector_t)te.cdte_addr.lba << 2; 47 *start = (sector_t)te.cdte_addr.lba << 2;
48 return 0; 48 return 0;
49 } 49 }
50 printk(KERN_ERR "HFS: Invalid session number or type of track\n"); 50 printk(KERN_ERR "hfs: invalid session number or type of track\n");
51 return -EINVAL; 51 return -EINVAL;
52 } 52 }
53 ms_info.addr_format = CDROM_LBA; 53 ms_info.addr_format = CDROM_LBA;
@@ -100,7 +100,7 @@ int hfs_mdb_get(struct super_block *sb)
100 100
101 HFS_SB(sb)->alloc_blksz = size = be32_to_cpu(mdb->drAlBlkSiz); 101 HFS_SB(sb)->alloc_blksz = size = be32_to_cpu(mdb->drAlBlkSiz);
102 if (!size || (size & (HFS_SECTOR_SIZE - 1))) { 102 if (!size || (size & (HFS_SECTOR_SIZE - 1))) {
103 hfs_warn("hfs_fs: bad allocation block size %d\n", size); 103 printk(KERN_ERR "hfs: bad allocation block size %d\n", size);
104 goto out_bh; 104 goto out_bh;
105 } 105 }
106 106
@@ -117,7 +117,7 @@ int hfs_mdb_get(struct super_block *sb)
117 size >>= 1; 117 size >>= 1;
118 brelse(bh); 118 brelse(bh);
119 if (!sb_set_blocksize(sb, size)) { 119 if (!sb_set_blocksize(sb, size)) {
120 printk("hfs_fs: unable to set blocksize to %u\n", size); 120 printk(KERN_ERR "hfs: unable to set blocksize to %u\n", size);
121 goto out; 121 goto out;
122 } 122 }
123 123
@@ -161,8 +161,8 @@ int hfs_mdb_get(struct super_block *sb)
161 } 161 }
162 162
163 if (!HFS_SB(sb)->alt_mdb) { 163 if (!HFS_SB(sb)->alt_mdb) {
164 hfs_warn("hfs_fs: unable to locate alternate MDB\n"); 164 printk(KERN_WARNING "hfs: unable to locate alternate MDB\n");
165 hfs_warn("hfs_fs: continuing without an alternate MDB\n"); 165 printk(KERN_WARNING "hfs: continuing without an alternate MDB\n");
166 } 166 }
167 167
168 HFS_SB(sb)->bitmap = (__be32 *)__get_free_pages(GFP_KERNEL, PAGE_SIZE < 8192 ? 1 : 0); 168 HFS_SB(sb)->bitmap = (__be32 *)__get_free_pages(GFP_KERNEL, PAGE_SIZE < 8192 ? 1 : 0);
@@ -177,7 +177,7 @@ int hfs_mdb_get(struct super_block *sb)
177 while (size) { 177 while (size) {
178 bh = sb_bread(sb, off >> sb->s_blocksize_bits); 178 bh = sb_bread(sb, off >> sb->s_blocksize_bits);
179 if (!bh) { 179 if (!bh) {
180 hfs_warn("hfs_fs: unable to read volume bitmap\n"); 180 printk(KERN_ERR "hfs: unable to read volume bitmap\n");
181 goto out; 181 goto out;
182 } 182 }
183 off2 = off & (sb->s_blocksize - 1); 183 off2 = off & (sb->s_blocksize - 1);
@@ -191,23 +191,23 @@ int hfs_mdb_get(struct super_block *sb)
191 191
192 HFS_SB(sb)->ext_tree = hfs_btree_open(sb, HFS_EXT_CNID, hfs_ext_keycmp); 192 HFS_SB(sb)->ext_tree = hfs_btree_open(sb, HFS_EXT_CNID, hfs_ext_keycmp);
193 if (!HFS_SB(sb)->ext_tree) { 193 if (!HFS_SB(sb)->ext_tree) {
194 hfs_warn("hfs_fs: unable to open extent tree\n"); 194 printk(KERN_ERR "hfs: unable to open extent tree\n");
195 goto out; 195 goto out;
196 } 196 }
197 HFS_SB(sb)->cat_tree = hfs_btree_open(sb, HFS_CAT_CNID, hfs_cat_keycmp); 197 HFS_SB(sb)->cat_tree = hfs_btree_open(sb, HFS_CAT_CNID, hfs_cat_keycmp);
198 if (!HFS_SB(sb)->cat_tree) { 198 if (!HFS_SB(sb)->cat_tree) {
199 hfs_warn("hfs_fs: unable to open catalog tree\n"); 199 printk(KERN_ERR "hfs: unable to open catalog tree\n");
200 goto out; 200 goto out;
201 } 201 }
202 202
203 attrib = mdb->drAtrb; 203 attrib = mdb->drAtrb;
204 if (!(attrib & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) { 204 if (!(attrib & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) {
205 hfs_warn("HFS-fs warning: Filesystem was not cleanly unmounted, " 205 printk(KERN_WARNING "hfs: filesystem was not cleanly unmounted, "
206 "running fsck.hfs is recommended. mounting read-only.\n"); 206 "running fsck.hfs is recommended. mounting read-only.\n");
207 sb->s_flags |= MS_RDONLY; 207 sb->s_flags |= MS_RDONLY;
208 } 208 }
209 if ((attrib & cpu_to_be16(HFS_SB_ATTRIB_SLOCK))) { 209 if ((attrib & cpu_to_be16(HFS_SB_ATTRIB_SLOCK))) {
210 hfs_warn("HFS-fs: Filesystem is marked locked, mounting read-only.\n"); 210 printk(KERN_WARNING "hfs: filesystem is marked locked, mounting read-only.\n");
211 sb->s_flags |= MS_RDONLY; 211 sb->s_flags |= MS_RDONLY;
212 } 212 }
213 if (!(sb->s_flags & MS_RDONLY)) { 213 if (!(sb->s_flags & MS_RDONLY)) {
@@ -303,7 +303,7 @@ void hfs_mdb_commit(struct super_block *sb)
303 while (size) { 303 while (size) {
304 bh = sb_bread(sb, block); 304 bh = sb_bread(sb, block);
305 if (!bh) { 305 if (!bh) {
306 hfs_warn("hfs_fs: unable to read volume bitmap\n"); 306 printk(KERN_ERR "hfs: unable to read volume bitmap\n");
307 break; 307 break;
308 } 308 }
309 len = min((int)sb->s_blocksize - off, size); 309 len = min((int)sb->s_blocksize - off, size);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index c5074aeafcae..1181d116117d 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -101,12 +101,12 @@ static int hfs_remount(struct super_block *sb, int *flags, char *data)
101 return 0; 101 return 0;
102 if (!(*flags & MS_RDONLY)) { 102 if (!(*flags & MS_RDONLY)) {
103 if (!(HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) { 103 if (!(HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) {
104 printk("HFS-fs warning: Filesystem was not cleanly unmounted, " 104 printk(KERN_WARNING "hfs: filesystem was not cleanly unmounted, "
105 "running fsck.hfs is recommended. leaving read-only.\n"); 105 "running fsck.hfs is recommended. leaving read-only.\n");
106 sb->s_flags |= MS_RDONLY; 106 sb->s_flags |= MS_RDONLY;
107 *flags |= MS_RDONLY; 107 *flags |= MS_RDONLY;
108 } else if (HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_SLOCK)) { 108 } else if (HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_SLOCK)) {
109 printk("HFS-fs: Filesystem is marked locked, leaving read-only.\n"); 109 printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n");
110 sb->s_flags |= MS_RDONLY; 110 sb->s_flags |= MS_RDONLY;
111 *flags |= MS_RDONLY; 111 *flags |= MS_RDONLY;
112 } 112 }
@@ -229,21 +229,21 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
229 switch (token) { 229 switch (token) {
230 case opt_uid: 230 case opt_uid:
231 if (match_int(&args[0], &tmp)) { 231 if (match_int(&args[0], &tmp)) {
232 printk("HFS: uid requires an argument\n"); 232 printk(KERN_ERR "hfs: uid requires an argument\n");
233 return 0; 233 return 0;
234 } 234 }
235 hsb->s_uid = (uid_t)tmp; 235 hsb->s_uid = (uid_t)tmp;
236 break; 236 break;
237 case opt_gid: 237 case opt_gid:
238 if (match_int(&args[0], &tmp)) { 238 if (match_int(&args[0], &tmp)) {
239 printk("HFS: gid requires an argument\n"); 239 printk(KERN_ERR "hfs: gid requires an argument\n");
240 return 0; 240 return 0;
241 } 241 }
242 hsb->s_gid = (gid_t)tmp; 242 hsb->s_gid = (gid_t)tmp;
243 break; 243 break;
244 case opt_umask: 244 case opt_umask:
245 if (match_octal(&args[0], &tmp)) { 245 if (match_octal(&args[0], &tmp)) {
246 printk("HFS: umask requires a value\n"); 246 printk(KERN_ERR "hfs: umask requires a value\n");
247 return 0; 247 return 0;
248 } 248 }
249 hsb->s_file_umask = (umode_t)tmp; 249 hsb->s_file_umask = (umode_t)tmp;
@@ -251,39 +251,39 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
251 break; 251 break;
252 case opt_file_umask: 252 case opt_file_umask:
253 if (match_octal(&args[0], &tmp)) { 253 if (match_octal(&args[0], &tmp)) {
254 printk("HFS: file_umask requires a value\n"); 254 printk(KERN_ERR "hfs: file_umask requires a value\n");
255 return 0; 255 return 0;
256 } 256 }
257 hsb->s_file_umask = (umode_t)tmp; 257 hsb->s_file_umask = (umode_t)tmp;
258 break; 258 break;
259 case opt_dir_umask: 259 case opt_dir_umask:
260 if (match_octal(&args[0], &tmp)) { 260 if (match_octal(&args[0], &tmp)) {
261 printk("HFS: dir_umask requires a value\n"); 261 printk(KERN_ERR "hfs: dir_umask requires a value\n");
262 return 0; 262 return 0;
263 } 263 }
264 hsb->s_dir_umask = (umode_t)tmp; 264 hsb->s_dir_umask = (umode_t)tmp;
265 break; 265 break;
266 case opt_part: 266 case opt_part:
267 if (match_int(&args[0], &hsb->part)) { 267 if (match_int(&args[0], &hsb->part)) {
268 printk("HFS: part requires an argument\n"); 268 printk(KERN_ERR "hfs: part requires an argument\n");
269 return 0; 269 return 0;
270 } 270 }
271 break; 271 break;
272 case opt_session: 272 case opt_session:
273 if (match_int(&args[0], &hsb->session)) { 273 if (match_int(&args[0], &hsb->session)) {
274 printk("HFS: session requires an argument\n"); 274 printk(KERN_ERR "hfs: session requires an argument\n");
275 return 0; 275 return 0;
276 } 276 }
277 break; 277 break;
278 case opt_type: 278 case opt_type:
279 if (match_fourchar(&args[0], &hsb->s_type)) { 279 if (match_fourchar(&args[0], &hsb->s_type)) {
280 printk("HFS+-fs: type requires a 4 character value\n"); 280 printk(KERN_ERR "hfs: type requires a 4 character value\n");
281 return 0; 281 return 0;
282 } 282 }
283 break; 283 break;
284 case opt_creator: 284 case opt_creator:
285 if (match_fourchar(&args[0], &hsb->s_creator)) { 285 if (match_fourchar(&args[0], &hsb->s_creator)) {
286 printk("HFS+-fs: creator requires a 4 character value\n"); 286 printk(KERN_ERR "hfs: creator requires a 4 character value\n");
287 return 0; 287 return 0;
288 } 288 }
289 break; 289 break;
@@ -292,13 +292,13 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
292 break; 292 break;
293 case opt_codepage: 293 case opt_codepage:
294 if (hsb->nls_disk) { 294 if (hsb->nls_disk) {
295 printk("HFS+-fs: unable to change codepage\n"); 295 printk(KERN_ERR "hfs: unable to change codepage\n");
296 return 0; 296 return 0;
297 } 297 }
298 p = match_strdup(&args[0]); 298 p = match_strdup(&args[0]);
299 hsb->nls_disk = load_nls(p); 299 hsb->nls_disk = load_nls(p);
300 if (!hsb->nls_disk) { 300 if (!hsb->nls_disk) {
301 printk("HFS+-fs: unable to load codepage \"%s\"\n", p); 301 printk(KERN_ERR "hfs: unable to load codepage \"%s\"\n", p);
302 kfree(p); 302 kfree(p);
303 return 0; 303 return 0;
304 } 304 }
@@ -306,13 +306,13 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
306 break; 306 break;
307 case opt_iocharset: 307 case opt_iocharset:
308 if (hsb->nls_io) { 308 if (hsb->nls_io) {
309 printk("HFS: unable to change iocharset\n"); 309 printk(KERN_ERR "hfs: unable to change iocharset\n");
310 return 0; 310 return 0;
311 } 311 }
312 p = match_strdup(&args[0]); 312 p = match_strdup(&args[0]);
313 hsb->nls_io = load_nls(p); 313 hsb->nls_io = load_nls(p);
314 if (!hsb->nls_io) { 314 if (!hsb->nls_io) {
315 printk("HFS: unable to load iocharset \"%s\"\n", p); 315 printk(KERN_ERR "hfs: unable to load iocharset \"%s\"\n", p);
316 kfree(p); 316 kfree(p);
317 return 0; 317 return 0;
318 } 318 }
@@ -326,7 +326,7 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
326 if (hsb->nls_disk && !hsb->nls_io) { 326 if (hsb->nls_disk && !hsb->nls_io) {
327 hsb->nls_io = load_nls_default(); 327 hsb->nls_io = load_nls_default();
328 if (!hsb->nls_io) { 328 if (!hsb->nls_io) {
329 printk("HFS: unable to load default iocharset\n"); 329 printk(KERN_ERR "hfs: unable to load default iocharset\n");
330 return 0; 330 return 0;
331 } 331 }
332 } 332 }
@@ -364,7 +364,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
364 364
365 res = -EINVAL; 365 res = -EINVAL;
366 if (!parse_options((char *)data, sbi)) { 366 if (!parse_options((char *)data, sbi)) {
367 hfs_warn("hfs_fs: unable to parse mount options.\n"); 367 printk(KERN_ERR "hfs: unable to parse mount options.\n");
368 goto bail; 368 goto bail;
369 } 369 }
370 370
@@ -375,7 +375,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
375 res = hfs_mdb_get(sb); 375 res = hfs_mdb_get(sb);
376 if (res) { 376 if (res) {
377 if (!silent) 377 if (!silent)
378 hfs_warn("VFS: Can't find a HFS filesystem on dev %s.\n", 378 printk(KERN_WARNING "hfs: can't find a HFS filesystem on dev %s.\n",
379 hfs_mdb_name(sb)); 379 hfs_mdb_name(sb));
380 res = -EINVAL; 380 res = -EINVAL;
381 goto bail; 381 goto bail;
@@ -407,7 +407,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
407bail_iput: 407bail_iput:
408 iput(root_inode); 408 iput(root_inode);
409bail_no_root: 409bail_no_root:
410 hfs_warn("hfs_fs: get root inode failed.\n"); 410 printk(KERN_ERR "hfs: get root inode failed.\n");
411bail: 411bail:
412 hfs_mdb_put(sb); 412 hfs_mdb_put(sb);
413 return res; 413 return res;
@@ -454,7 +454,7 @@ static void __exit exit_hfs_fs(void)
454{ 454{
455 unregister_filesystem(&hfs_fs_type); 455 unregister_filesystem(&hfs_fs_type);
456 if (kmem_cache_destroy(hfs_inode_cachep)) 456 if (kmem_cache_destroy(hfs_inode_cachep))
457 printk(KERN_INFO "hfs_inode_cache: not all structures were freed\n"); 457 printk(KERN_ERR "hfs_inode_cache: not all structures were freed\n");
458} 458}
459 459
460module_init(init_hfs_fs) 460module_init(init_hfs_fs)
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c
index 257cdde0514b..5007a41f1be9 100644
--- a/fs/hfsplus/bfind.c
+++ b/fs/hfsplus/bfind.c
@@ -64,7 +64,6 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
64 else 64 else
65 e = rec - 1; 65 e = rec - 1;
66 } while (b <= e); 66 } while (b <= e);
67 //printk("%d: %d,%d,%d\n", bnode->this, b, e, rec);
68 if (rec != e && e >= 0) { 67 if (rec != e && e >= 0) {
69 len = hfs_brec_lenoff(bnode, e, &off); 68 len = hfs_brec_lenoff(bnode, e, &off);
70 keylen = hfs_brec_keylen(bnode, e); 69 keylen = hfs_brec_keylen(bnode, e);
@@ -127,7 +126,7 @@ int hfs_brec_find(struct hfs_find_data *fd)
127 return res; 126 return res;
128 127
129invalid: 128invalid:
130 printk("HFS+-fs: inconsistency in B*Tree (%d,%d,%d,%u,%u)\n", 129 printk(KERN_ERR "hfs: inconsistency in B*Tree (%d,%d,%d,%u,%u)\n",
131 height, bnode->height, bnode->type, nidx, parent); 130 height, bnode->height, bnode->type, nidx, parent);
132 res = -EIO; 131 res = -EIO;
133release: 132release:
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index c7d316455fa0..9fb51632303c 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c
@@ -29,7 +29,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma
29 return size; 29 return size;
30 30
31 dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); 31 dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len);
32 down(&HFSPLUS_SB(sb).alloc_file->i_sem); 32 mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
33 mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; 33 mapping = HFSPLUS_SB(sb).alloc_file->i_mapping;
34 page = read_cache_page(mapping, offset / PAGE_CACHE_BITS, 34 page = read_cache_page(mapping, offset / PAGE_CACHE_BITS,
35 (filler_t *)mapping->a_ops->readpage, NULL); 35 (filler_t *)mapping->a_ops->readpage, NULL);
@@ -143,7 +143,7 @@ done:
143 sb->s_dirt = 1; 143 sb->s_dirt = 1;
144 dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); 144 dprint(DBG_BITMAP, "-> %u,%u\n", start, *max);
145out: 145out:
146 up(&HFSPLUS_SB(sb).alloc_file->i_sem); 146 mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
147 return start; 147 return start;
148} 148}
149 149
@@ -164,7 +164,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count)
164 if ((offset + count) > HFSPLUS_SB(sb).total_blocks) 164 if ((offset + count) > HFSPLUS_SB(sb).total_blocks)
165 return -2; 165 return -2;
166 166
167 down(&HFSPLUS_SB(sb).alloc_file->i_sem); 167 mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
168 mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; 168 mapping = HFSPLUS_SB(sb).alloc_file->i_mapping;
169 pnr = offset / PAGE_CACHE_BITS; 169 pnr = offset / PAGE_CACHE_BITS;
170 page = read_cache_page(mapping, pnr, (filler_t *)mapping->a_ops->readpage, NULL); 170 page = read_cache_page(mapping, pnr, (filler_t *)mapping->a_ops->readpage, NULL);
@@ -215,7 +215,7 @@ out:
215 kunmap(page); 215 kunmap(page);
216 HFSPLUS_SB(sb).free_blocks += len; 216 HFSPLUS_SB(sb).free_blocks += len;
217 sb->s_dirt = 1; 217 sb->s_dirt = 1;
218 up(&HFSPLUS_SB(sb).alloc_file->i_sem); 218 mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex);
219 219
220 return 0; 220 return 0;
221} 221}
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 930cd9212de8..8f07e8fbd03d 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -358,7 +358,7 @@ void hfs_bnode_unlink(struct hfs_bnode *node)
358 358
359 // move down? 359 // move down?
360 if (!node->prev && !node->next) { 360 if (!node->prev && !node->next) {
361 printk("hfs_btree_del_level\n"); 361 printk(KERN_DEBUG "hfs_btree_del_level\n");
362 } 362 }
363 if (!node->parent) { 363 if (!node->parent) {
364 tree->root = 0; 364 tree->root = 0;
@@ -379,7 +379,7 @@ struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid)
379 struct hfs_bnode *node; 379 struct hfs_bnode *node;
380 380
381 if (cnid >= tree->node_count) { 381 if (cnid >= tree->node_count) {
382 printk("HFS+-fs: request for non-existent node %d in B*Tree\n", cnid); 382 printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid);
383 return NULL; 383 return NULL;
384 } 384 }
385 385
@@ -402,7 +402,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
402 loff_t off; 402 loff_t off;
403 403
404 if (cnid >= tree->node_count) { 404 if (cnid >= tree->node_count) {
405 printk("HFS+-fs: request for non-existent node %d in B*Tree\n", cnid); 405 printk(KERN_ERR "hfs: request for non-existent node %d in B*Tree\n", cnid);
406 return NULL; 406 return NULL;
407 } 407 }
408 408
@@ -576,8 +576,9 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num)
576 node = hfs_bnode_findhash(tree, num); 576 node = hfs_bnode_findhash(tree, num);
577 spin_unlock(&tree->hash_lock); 577 spin_unlock(&tree->hash_lock);
578 if (node) { 578 if (node) {
579 printk("new node %u already hashed?\n", num); 579 printk(KERN_CRIT "new node %u already hashed?\n", num);
580 BUG(); 580 WARN_ON(1);
581 return node;
581 } 582 }
582 node = __hfs_bnode_create(tree, num); 583 node = __hfs_bnode_create(tree, num);
583 if (!node) 584 if (!node)
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 0ccef2ab790c..c88e5d72a402 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -360,7 +360,7 @@ again:
360 end_off = hfs_bnode_read_u16(parent, end_rec_off); 360 end_off = hfs_bnode_read_u16(parent, end_rec_off);
361 if (end_rec_off - end_off < diff) { 361 if (end_rec_off - end_off < diff) {
362 362
363 printk("splitting index node...\n"); 363 printk(KERN_DEBUG "hfs: splitting index node...\n");
364 fd->bnode = parent; 364 fd->bnode = parent;
365 new_node = hfs_bnode_split(fd); 365 new_node = hfs_bnode_split(fd);
366 if (IS_ERR(new_node)) 366 if (IS_ERR(new_node))
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 44326aa2bd34..a67edfa34e9e 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -31,17 +31,8 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
31 31
32 init_MUTEX(&tree->tree_lock); 32 init_MUTEX(&tree->tree_lock);
33 spin_lock_init(&tree->hash_lock); 33 spin_lock_init(&tree->hash_lock);
34 /* Set the correct compare function */
35 tree->sb = sb; 34 tree->sb = sb;
36 tree->cnid = id; 35 tree->cnid = id;
37 if (id == HFSPLUS_EXT_CNID) {
38 tree->keycmp = hfsplus_ext_cmp_key;
39 } else if (id == HFSPLUS_CAT_CNID) {
40 tree->keycmp = hfsplus_cat_cmp_key;
41 } else {
42 printk("HFS+-fs: unknown B*Tree requested\n");
43 goto free_tree;
44 }
45 tree->inode = iget(sb, id); 36 tree->inode = iget(sb, id);
46 if (!tree->inode) 37 if (!tree->inode)
47 goto free_tree; 38 goto free_tree;
@@ -64,6 +55,20 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
64 tree->max_key_len = be16_to_cpu(head->max_key_len); 55 tree->max_key_len = be16_to_cpu(head->max_key_len);
65 tree->depth = be16_to_cpu(head->depth); 56 tree->depth = be16_to_cpu(head->depth);
66 57
58 /* Set the correct compare function */
59 if (id == HFSPLUS_EXT_CNID) {
60 tree->keycmp = hfsplus_ext_cmp_key;
61 } else if (id == HFSPLUS_CAT_CNID) {
62 if ((HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX) &&
63 (head->key_type == HFSPLUS_KEY_BINARY))
64 tree->keycmp = hfsplus_cat_bin_cmp_key;
65 else
66 tree->keycmp = hfsplus_cat_case_cmp_key;
67 } else {
68 printk(KERN_ERR "hfs: unknown B*Tree requested\n");
69 goto fail_page;
70 }
71
67 size = tree->node_size; 72 size = tree->node_size;
68 if (!size || size & (size - 1)) 73 if (!size || size & (size - 1))
69 goto fail_page; 74 goto fail_page;
@@ -99,7 +104,7 @@ void hfs_btree_close(struct hfs_btree *tree)
99 while ((node = tree->node_hash[i])) { 104 while ((node = tree->node_hash[i])) {
100 tree->node_hash[i] = node->next_hash; 105 tree->node_hash[i] = node->next_hash;
101 if (atomic_read(&node->refcnt)) 106 if (atomic_read(&node->refcnt))
102 printk("HFS+: node %d:%d still has %d user(s)!\n", 107 printk(KERN_CRIT "hfs: node %d:%d still has %d user(s)!\n",
103 node->tree->cnid, node->this, atomic_read(&node->refcnt)); 108 node->tree->cnid, node->this, atomic_read(&node->refcnt));
104 hfs_bnode_free(node); 109 hfs_bnode_free(node);
105 tree->node_hash_cnt--; 110 tree->node_hash_cnt--;
@@ -223,10 +228,6 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
223 tree->free_nodes--; 228 tree->free_nodes--;
224 mark_inode_dirty(tree->inode); 229 mark_inode_dirty(tree->inode);
225 hfs_bnode_put(node); 230 hfs_bnode_put(node);
226 if (!idx) {
227 printk("unexpected idx %u (%u)\n", idx, node->this);
228 BUG();
229 }
230 return hfs_bnode_create(tree, idx); 231 return hfs_bnode_create(tree, idx);
231 } 232 }
232 } 233 }
@@ -242,7 +243,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
242 kunmap(*pagep); 243 kunmap(*pagep);
243 nidx = node->next; 244 nidx = node->next;
244 if (!nidx) { 245 if (!nidx) {
245 printk("create new bmap node...\n"); 246 printk(KERN_DEBUG "hfs: create new bmap node...\n");
246 next_node = hfs_bmap_new_bmap(node, idx); 247 next_node = hfs_bmap_new_bmap(node, idx);
247 } else 248 } else
248 next_node = hfs_bnode_find(tree, nidx); 249 next_node = hfs_bnode_find(tree, nidx);
@@ -284,7 +285,7 @@ void hfs_bmap_free(struct hfs_bnode *node)
284 hfs_bnode_put(node); 285 hfs_bnode_put(node);
285 if (!i) { 286 if (!i) {
286 /* panic */; 287 /* panic */;
287 printk("HFS: unable to free bnode %u. bmap not found!\n", node->this); 288 printk(KERN_CRIT "hfs: unable to free bnode %u. bmap not found!\n", node->this);
288 return; 289 return;
289 } 290 }
290 node = hfs_bnode_find(tree, i); 291 node = hfs_bnode_find(tree, i);
@@ -292,7 +293,7 @@ void hfs_bmap_free(struct hfs_bnode *node)
292 return; 293 return;
293 if (node->type != HFS_NODE_MAP) { 294 if (node->type != HFS_NODE_MAP) {
294 /* panic */; 295 /* panic */;
295 printk("HFS: invalid bmap found! (%u,%d)\n", node->this, node->type); 296 printk(KERN_CRIT "hfs: invalid bmap found! (%u,%d)\n", node->this, node->type);
296 hfs_bnode_put(node); 297 hfs_bnode_put(node);
297 return; 298 return;
298 } 299 }
@@ -305,7 +306,7 @@ void hfs_bmap_free(struct hfs_bnode *node)
305 m = 1 << (~nidx & 7); 306 m = 1 << (~nidx & 7);
306 byte = data[off]; 307 byte = data[off];
307 if (!(byte & m)) { 308 if (!(byte & m)) {
308 printk("HFS: trying to free free bnode %u(%d)\n", node->this, node->type); 309 printk(KERN_CRIT "hfs: trying to free free bnode %u(%d)\n", node->this, node->type);
309 kunmap(page); 310 kunmap(page);
310 hfs_bnode_put(node); 311 hfs_bnode_put(node);
311 return; 312 return;
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 94712790c8b3..f2d7c49ce759 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -13,7 +13,8 @@
13#include "hfsplus_fs.h" 13#include "hfsplus_fs.h"
14#include "hfsplus_raw.h" 14#include "hfsplus_raw.h"
15 15
16int hfsplus_cat_cmp_key(hfsplus_btree_key *k1, hfsplus_btree_key *k2) 16int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *k1,
17 const hfsplus_btree_key *k2)
17{ 18{
18 __be32 k1p, k2p; 19 __be32 k1p, k2p;
19 20
@@ -22,7 +23,20 @@ int hfsplus_cat_cmp_key(hfsplus_btree_key *k1, hfsplus_btree_key *k2)
22 if (k1p != k2p) 23 if (k1p != k2p)
23 return be32_to_cpu(k1p) < be32_to_cpu(k2p) ? -1 : 1; 24 return be32_to_cpu(k1p) < be32_to_cpu(k2p) ? -1 : 1;
24 25
25 return hfsplus_unistrcmp(&k1->cat.name, &k2->cat.name); 26 return hfsplus_strcasecmp(&k1->cat.name, &k2->cat.name);
27}
28
29int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
30 const hfsplus_btree_key *k2)
31{
32 __be32 k1p, k2p;
33
34 k1p = k1->cat.parent;
35 k2p = k2->cat.parent;
36 if (k1p != k2p)
37 return be32_to_cpu(k1p) < be32_to_cpu(k2p) ? -1 : 1;
38
39 return hfsplus_strcmp(&k1->cat.name, &k2->cat.name);
26} 40}
27 41
28void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key, 42void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
@@ -80,8 +94,11 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i
80 memset(folder, 0, sizeof(*folder)); 94 memset(folder, 0, sizeof(*folder));
81 folder->type = cpu_to_be16(HFSPLUS_FOLDER); 95 folder->type = cpu_to_be16(HFSPLUS_FOLDER);
82 folder->id = cpu_to_be32(inode->i_ino); 96 folder->id = cpu_to_be32(inode->i_ino);
83 folder->create_date = folder->content_mod_date = 97 HFSPLUS_I(inode).create_date =
84 folder->attribute_mod_date = folder->access_date = hfsp_now2mt(); 98 folder->create_date =
99 folder->content_mod_date =
100 folder->attribute_mod_date =
101 folder->access_date = hfsp_now2mt();
85 hfsplus_set_perms(inode, &folder->permissions); 102 hfsplus_set_perms(inode, &folder->permissions);
86 if (inode == HFSPLUS_SB(inode->i_sb).hidden_dir) 103 if (inode == HFSPLUS_SB(inode->i_sb).hidden_dir)
87 /* invisible and namelocked */ 104 /* invisible and namelocked */
@@ -95,18 +112,27 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i
95 file->type = cpu_to_be16(HFSPLUS_FILE); 112 file->type = cpu_to_be16(HFSPLUS_FILE);
96 file->flags = cpu_to_be16(HFSPLUS_FILE_THREAD_EXISTS); 113 file->flags = cpu_to_be16(HFSPLUS_FILE_THREAD_EXISTS);
97 file->id = cpu_to_be32(cnid); 114 file->id = cpu_to_be32(cnid);
98 file->create_date = file->content_mod_date = 115 HFSPLUS_I(inode).create_date =
99 file->attribute_mod_date = file->access_date = hfsp_now2mt(); 116 file->create_date =
117 file->content_mod_date =
118 file->attribute_mod_date =
119 file->access_date = hfsp_now2mt();
100 if (cnid == inode->i_ino) { 120 if (cnid == inode->i_ino) {
101 hfsplus_set_perms(inode, &file->permissions); 121 hfsplus_set_perms(inode, &file->permissions);
102 file->user_info.fdType = cpu_to_be32(HFSPLUS_SB(inode->i_sb).type); 122 if (S_ISLNK(inode->i_mode)) {
103 file->user_info.fdCreator = cpu_to_be32(HFSPLUS_SB(inode->i_sb).creator); 123 file->user_info.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE);
124 file->user_info.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR);
125 } else {
126 file->user_info.fdType = cpu_to_be32(HFSPLUS_SB(inode->i_sb).type);
127 file->user_info.fdCreator = cpu_to_be32(HFSPLUS_SB(inode->i_sb).creator);
128 }
104 if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) 129 if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE)
105 file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); 130 file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED);
106 } else { 131 } else {
107 file->user_info.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE); 132 file->user_info.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE);
108 file->user_info.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR); 133 file->user_info.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR);
109 file->user_info.fdFlags = cpu_to_be16(0x100); 134 file->user_info.fdFlags = cpu_to_be16(0x100);
135 file->create_date = HFSPLUS_I(HFSPLUS_SB(inode->i_sb).hidden_dir).create_date;
110 file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode).dev); 136 file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode).dev);
111 } 137 }
112 return sizeof(*file); 138 return sizeof(*file);
@@ -139,7 +165,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
139 165
140 type = be16_to_cpu(tmp.type); 166 type = be16_to_cpu(tmp.type);
141 if (type != HFSPLUS_FOLDER_THREAD && type != HFSPLUS_FILE_THREAD) { 167 if (type != HFSPLUS_FOLDER_THREAD && type != HFSPLUS_FILE_THREAD) {
142 printk("HFS+-fs: Found bad thread record in catalog\n"); 168 printk(KERN_ERR "hfs: found bad thread record in catalog\n");
143 return -EIO; 169 return -EIO;
144 } 170 }
145 171
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 50c8f44b6c66..01a6fe3a395c 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -66,25 +66,32 @@ again:
66 } 66 }
67 cnid = be32_to_cpu(entry.file.id); 67 cnid = be32_to_cpu(entry.file.id);
68 if (entry.file.user_info.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) && 68 if (entry.file.user_info.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) &&
69 entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR)) { 69 entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) &&
70 (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb).hidden_dir).create_date ||
71 entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode).create_date) &&
72 HFSPLUS_SB(sb).hidden_dir) {
70 struct qstr str; 73 struct qstr str;
71 char name[32]; 74 char name[32];
72 75
73 if (dentry->d_fsdata) { 76 if (dentry->d_fsdata) {
74 err = -ENOENT; 77 /*
75 inode = NULL; 78 * We found a link pointing to another link,
76 goto out; 79 * so ignore it and treat it as regular file.
80 */
81 cnid = (unsigned long)dentry->d_fsdata;
82 linkid = 0;
83 } else {
84 dentry->d_fsdata = (void *)(unsigned long)cnid;
85 linkid = be32_to_cpu(entry.file.permissions.dev);
86 str.len = sprintf(name, "iNode%d", linkid);
87 str.name = name;
88 hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_SB(sb).hidden_dir->i_ino, &str);
89 goto again;
77 } 90 }
78 dentry->d_fsdata = (void *)(unsigned long)cnid;
79 linkid = be32_to_cpu(entry.file.permissions.dev);
80 str.len = sprintf(name, "iNode%d", linkid);
81 str.name = name;
82 hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_SB(sb).hidden_dir->i_ino, &str);
83 goto again;
84 } else if (!dentry->d_fsdata) 91 } else if (!dentry->d_fsdata)
85 dentry->d_fsdata = (void *)(unsigned long)cnid; 92 dentry->d_fsdata = (void *)(unsigned long)cnid;
86 } else { 93 } else {
87 printk("HFS+-fs: Illegal catalog entry type in lookup\n"); 94 printk(KERN_ERR "hfs: invalid catalog entry type in lookup\n");
88 err = -EIO; 95 err = -EIO;
89 goto fail; 96 goto fail;
90 } 97 }
@@ -132,12 +139,12 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
132 case 1: 139 case 1:
133 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); 140 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength);
134 if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) { 141 if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) {
135 printk("HFS+-fs: bad catalog folder thread\n"); 142 printk(KERN_ERR "hfs: bad catalog folder thread\n");
136 err = -EIO; 143 err = -EIO;
137 goto out; 144 goto out;
138 } 145 }
139 if (fd.entrylength < HFSPLUS_MIN_THREAD_SZ) { 146 if (fd.entrylength < HFSPLUS_MIN_THREAD_SZ) {
140 printk("HFS+-fs: truncated catalog thread\n"); 147 printk(KERN_ERR "hfs: truncated catalog thread\n");
141 err = -EIO; 148 err = -EIO;
142 goto out; 149 goto out;
143 } 150 }
@@ -156,7 +163,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
156 163
157 for (;;) { 164 for (;;) {
158 if (be32_to_cpu(fd.key->cat.parent) != inode->i_ino) { 165 if (be32_to_cpu(fd.key->cat.parent) != inode->i_ino) {
159 printk("HFS+-fs: walked past end of dir\n"); 166 printk(KERN_ERR "hfs: walked past end of dir\n");
160 err = -EIO; 167 err = -EIO;
161 goto out; 168 goto out;
162 } 169 }
@@ -168,7 +175,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
168 goto out; 175 goto out;
169 if (type == HFSPLUS_FOLDER) { 176 if (type == HFSPLUS_FOLDER) {
170 if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) { 177 if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) {
171 printk("HFS+-fs: small dir entry\n"); 178 printk(KERN_ERR "hfs: small dir entry\n");
172 err = -EIO; 179 err = -EIO;
173 goto out; 180 goto out;
174 } 181 }
@@ -180,7 +187,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
180 break; 187 break;
181 } else if (type == HFSPLUS_FILE) { 188 } else if (type == HFSPLUS_FILE) {
182 if (fd.entrylength < sizeof(struct hfsplus_cat_file)) { 189 if (fd.entrylength < sizeof(struct hfsplus_cat_file)) {
183 printk("HFS+-fs: small file entry\n"); 190 printk(KERN_ERR "hfs: small file entry\n");
184 err = -EIO; 191 err = -EIO;
185 goto out; 192 goto out;
186 } 193 }
@@ -188,7 +195,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
188 be32_to_cpu(entry.file.id), DT_REG)) 195 be32_to_cpu(entry.file.id), DT_REG))
189 break; 196 break;
190 } else { 197 } else {
191 printk("HFS+-fs: bad catalog entry type\n"); 198 printk(KERN_ERR "hfs: bad catalog entry type\n");
192 err = -EIO; 199 err = -EIO;
193 goto out; 200 goto out;
194 } 201 }
@@ -330,7 +337,8 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
330 if (res) 337 if (res)
331 return res; 338 return res;
332 339
333 inode->i_nlink--; 340 if (inode->i_nlink > 0)
341 inode->i_nlink--;
334 hfsplus_delete_inode(inode); 342 hfsplus_delete_inode(inode);
335 if (inode->i_ino != cnid && !inode->i_nlink) { 343 if (inode->i_ino != cnid && !inode->i_nlink) {
336 if (!atomic_read(&HFSPLUS_I(inode).opencnt)) { 344 if (!atomic_read(&HFSPLUS_I(inode).opencnt)) {
@@ -339,7 +347,8 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
339 hfsplus_delete_inode(inode); 347 hfsplus_delete_inode(inode);
340 } else 348 } else
341 inode->i_flags |= S_DEAD; 349 inode->i_flags |= S_DEAD;
342 } 350 } else
351 inode->i_nlink = 0;
343 inode->i_ctime = CURRENT_TIME_SEC; 352 inode->i_ctime = CURRENT_TIME_SEC;
344 mark_inode_dirty(inode); 353 mark_inode_dirty(inode);
345 354
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index e3ff56a03011..1a7480089e82 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -16,7 +16,8 @@
16#include "hfsplus_raw.h" 16#include "hfsplus_raw.h"
17 17
18/* Compare two extents keys, returns 0 on same, pos/neg for difference */ 18/* Compare two extents keys, returns 0 on same, pos/neg for difference */
19int hfsplus_ext_cmp_key(hfsplus_btree_key *k1, hfsplus_btree_key *k2) 19int hfsplus_ext_cmp_key(const hfsplus_btree_key *k1,
20 const hfsplus_btree_key *k2)
20{ 21{
21 __be32 k1id, k2id; 22 __be32 k1id, k2id;
22 __be32 k1s, k2s; 23 __be32 k1s, k2s;
@@ -349,10 +350,9 @@ int hfsplus_file_extend(struct inode *inode)
349 350
350 if (HFSPLUS_SB(sb).alloc_file->i_size * 8 < HFSPLUS_SB(sb).total_blocks - HFSPLUS_SB(sb).free_blocks + 8) { 351 if (HFSPLUS_SB(sb).alloc_file->i_size * 8 < HFSPLUS_SB(sb).total_blocks - HFSPLUS_SB(sb).free_blocks + 8) {
351 // extend alloc file 352 // extend alloc file
352 printk("extend alloc file! (%Lu,%u,%u)\n", HFSPLUS_SB(sb).alloc_file->i_size * 8, 353 printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n", HFSPLUS_SB(sb).alloc_file->i_size * 8,
353 HFSPLUS_SB(sb).total_blocks, HFSPLUS_SB(sb).free_blocks); 354 HFSPLUS_SB(sb).total_blocks, HFSPLUS_SB(sb).free_blocks);
354 return -ENOSPC; 355 return -ENOSPC;
355 //BUG();
356 } 356 }
357 357
358 down(&HFSPLUS_I(inode).extents_lock); 358 down(&HFSPLUS_I(inode).extents_lock);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index df16fcbff3fb..7ae393637a0c 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -36,7 +36,7 @@
36#define HFSPLUS_TYPE_DATA 0x00 36#define HFSPLUS_TYPE_DATA 0x00
37#define HFSPLUS_TYPE_RSRC 0xFF 37#define HFSPLUS_TYPE_RSRC 0xFF
38 38
39typedef int (*btree_keycmp)(hfsplus_btree_key *, hfsplus_btree_key *); 39typedef int (*btree_keycmp)(const hfsplus_btree_key *, const hfsplus_btree_key *);
40 40
41#define NODE_HASH_SIZE 256 41#define NODE_HASH_SIZE 256
42 42
@@ -143,15 +143,13 @@ struct hfsplus_sb_info {
143 143
144 unsigned long flags; 144 unsigned long flags;
145 145
146 atomic_t inode_cnt;
147 u32 last_inode_cnt;
148
149 struct hlist_head rsrc_inodes; 146 struct hlist_head rsrc_inodes;
150}; 147};
151 148
152#define HFSPLUS_SB_WRITEBACKUP 0x0001 149#define HFSPLUS_SB_WRITEBACKUP 0x0001
153#define HFSPLUS_SB_NODECOMPOSE 0x0002 150#define HFSPLUS_SB_NODECOMPOSE 0x0002
154#define HFSPLUS_SB_FORCE 0x0004 151#define HFSPLUS_SB_FORCE 0x0004
152#define HFSPLUS_SB_HFSX 0x0008
155 153
156 154
157struct hfsplus_inode_info { 155struct hfsplus_inode_info {
@@ -168,6 +166,7 @@ struct hfsplus_inode_info {
168 struct inode *rsrc_inode; 166 struct inode *rsrc_inode;
169 unsigned long flags; 167 unsigned long flags;
170 168
169 __be32 create_date;
171 /* Device number in hfsplus_permissions in catalog */ 170 /* Device number in hfsplus_permissions in catalog */
172 u32 dev; 171 u32 dev;
173 /* BSD system and user file flags */ 172 /* BSD system and user file flags */
@@ -306,7 +305,8 @@ int hfs_brec_read(struct hfs_find_data *, void *, int);
306int hfs_brec_goto(struct hfs_find_data *, int); 305int hfs_brec_goto(struct hfs_find_data *, int);
307 306
308/* catalog.c */ 307/* catalog.c */
309int hfsplus_cat_cmp_key(hfsplus_btree_key *, hfsplus_btree_key *); 308int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
309int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
310void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *, u32, struct qstr *); 310void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *, u32, struct qstr *);
311int hfsplus_find_cat(struct super_block *, u32, struct hfs_find_data *); 311int hfsplus_find_cat(struct super_block *, u32, struct hfs_find_data *);
312int hfsplus_create_cat(u32, struct inode *, struct qstr *, struct inode *); 312int hfsplus_create_cat(u32, struct inode *, struct qstr *, struct inode *);
@@ -315,7 +315,7 @@ int hfsplus_rename_cat(u32, struct inode *, struct qstr *,
315 struct inode *, struct qstr *); 315 struct inode *, struct qstr *);
316 316
317/* extents.c */ 317/* extents.c */
318int hfsplus_ext_cmp_key(hfsplus_btree_key *, hfsplus_btree_key *); 318int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
319void hfsplus_ext_write_extent(struct inode *); 319void hfsplus_ext_write_extent(struct inode *);
320int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int); 320int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int);
321int hfsplus_free_fork(struct super_block *, u32, struct hfsplus_fork_raw *, int); 321int hfsplus_free_fork(struct super_block *, u32, struct hfsplus_fork_raw *, int);
@@ -353,7 +353,8 @@ extern u16 hfsplus_decompose_table[];
353extern u16 hfsplus_compose_table[]; 353extern u16 hfsplus_compose_table[];
354 354
355/* unicode.c */ 355/* unicode.c */
356int hfsplus_unistrcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *); 356int hfsplus_strcasecmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *);
357int hfsplus_strcmp(const struct hfsplus_unistr *, const struct hfsplus_unistr *);
357int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *); 358int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *);
358int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int); 359int hfsplus_asc2uni(struct super_block *, struct hfsplus_unistr *, const char *, int);
359 360
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index b4fbed633219..49205531a500 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -22,8 +22,10 @@
22#define HFSPLUS_SECTOR_SHIFT 9 22#define HFSPLUS_SECTOR_SHIFT 9
23#define HFSPLUS_VOLHEAD_SECTOR 2 23#define HFSPLUS_VOLHEAD_SECTOR 2
24#define HFSPLUS_VOLHEAD_SIG 0x482b 24#define HFSPLUS_VOLHEAD_SIG 0x482b
25#define HFSPLUS_VOLHEAD_SIGX 0x4858
25#define HFSPLUS_SUPER_MAGIC 0x482b 26#define HFSPLUS_SUPER_MAGIC 0x482b
26#define HFSPLUS_CURRENT_VERSION 4 27#define HFSPLUS_MIN_VERSION 4
28#define HFSPLUS_CURRENT_VERSION 5
27 29
28#define HFSP_WRAP_MAGIC 0x4244 30#define HFSP_WRAP_MAGIC 0x4244
29#define HFSP_WRAP_ATTRIB_SLOCK 0x8000 31#define HFSP_WRAP_ATTRIB_SLOCK 0x8000
@@ -41,6 +43,9 @@
41#define HFSP_HARDLINK_TYPE 0x686c6e6b /* 'hlnk' */ 43#define HFSP_HARDLINK_TYPE 0x686c6e6b /* 'hlnk' */
42#define HFSP_HFSPLUS_CREATOR 0x6866732b /* 'hfs+' */ 44#define HFSP_HFSPLUS_CREATOR 0x6866732b /* 'hfs+' */
43 45
46#define HFSP_SYMLINK_TYPE 0x736c6e6b /* 'slnk' */
47#define HFSP_SYMLINK_CREATOR 0x72686170 /* 'rhap' */
48
44#define HFSP_MOUNT_VERSION 0x482b4c78 /* 'H+Lx' */ 49#define HFSP_MOUNT_VERSION 0x482b4c78 /* 'H+Lx' */
45 50
46/* Structures used on disk */ 51/* Structures used on disk */
@@ -161,7 +166,7 @@ struct hfs_btree_header_rec {
161 u16 reserved1; 166 u16 reserved1;
162 __be32 clump_size; 167 __be32 clump_size;
163 u8 btree_type; 168 u8 btree_type;
164 u8 reserved2; 169 u8 key_type;
165 __be32 attributes; 170 __be32 attributes;
166 u32 reserved3[16]; 171 u32 reserved3[16];
167} __packed; 172} __packed;
@@ -186,6 +191,10 @@ struct hfs_btree_header_rec {
186#define HFSPLUS_EXCH_CNID 15 /* ExchangeFiles temp id */ 191#define HFSPLUS_EXCH_CNID 15 /* ExchangeFiles temp id */
187#define HFSPLUS_FIRSTUSER_CNID 16 /* first available user id */ 192#define HFSPLUS_FIRSTUSER_CNID 16 /* first available user id */
188 193
194/* btree key type */
195#define HFSPLUS_KEY_CASEFOLDING 0xCF /* case-insensitive */
196#define HFSPLUS_KEY_BINARY 0xBC /* case-sensitive */
197
189/* HFS+ catalog entry key */ 198/* HFS+ catalog entry key */
190struct hfsplus_cat_key { 199struct hfsplus_cat_key {
191 __be16 key_len; 200 __be16 key_len;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index fc98583cf045..12ed2b7d046b 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -18,13 +18,11 @@
18 18
19static int hfsplus_readpage(struct file *file, struct page *page) 19static int hfsplus_readpage(struct file *file, struct page *page)
20{ 20{
21 //printk("readpage: %lu\n", page->index);
22 return block_read_full_page(page, hfsplus_get_block); 21 return block_read_full_page(page, hfsplus_get_block);
23} 22}
24 23
25static int hfsplus_writepage(struct page *page, struct writeback_control *wbc) 24static int hfsplus_writepage(struct page *page, struct writeback_control *wbc)
26{ 25{
27 //printk("writepage: %lu\n", page->index);
28 return block_write_full_page(page, hfsplus_get_block, wbc); 26 return block_write_full_page(page, hfsplus_get_block, wbc);
29} 27}
30 28
@@ -92,7 +90,6 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
92 } while (--i && nidx < tree->node_count); 90 } while (--i && nidx < tree->node_count);
93 spin_unlock(&tree->hash_lock); 91 spin_unlock(&tree->hash_lock);
94 } 92 }
95 //printk("releasepage: %lu,%x = %d\n", page->index, mask, res);
96 return res ? try_to_free_buffers(page) : 0; 93 return res ? try_to_free_buffers(page) : 0;
97} 94}
98 95
@@ -182,11 +179,6 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
182 igrab(dir); 179 igrab(dir);
183 hlist_add_head(&inode->i_hash, &HFSPLUS_SB(sb).rsrc_inodes); 180 hlist_add_head(&inode->i_hash, &HFSPLUS_SB(sb).rsrc_inodes);
184 mark_inode_dirty(inode); 181 mark_inode_dirty(inode);
185 {
186 void hfsplus_inode_check(struct super_block *sb);
187 atomic_inc(&HFSPLUS_SB(sb).inode_cnt);
188 hfsplus_inode_check(sb);
189 }
190out: 182out:
191 d_add(dentry, inode); 183 d_add(dentry, inode);
192 return NULL; 184 return NULL;
@@ -276,13 +268,13 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
276 if (atomic_read(&file->f_count) != 0) 268 if (atomic_read(&file->f_count) != 0)
277 return 0; 269 return 0;
278 if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) { 270 if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) {
279 down(&inode->i_sem); 271 mutex_lock(&inode->i_mutex);
280 hfsplus_file_truncate(inode); 272 hfsplus_file_truncate(inode);
281 if (inode->i_flags & S_DEAD) { 273 if (inode->i_flags & S_DEAD) {
282 hfsplus_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL); 274 hfsplus_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL);
283 hfsplus_delete_inode(inode); 275 hfsplus_delete_inode(inode);
284 } 276 }
285 up(&inode->i_sem); 277 mutex_unlock(&inode->i_mutex);
286 } 278 }
287 return 0; 279 return 0;
288} 280}
@@ -317,11 +309,6 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
317 if (!inode) 309 if (!inode)
318 return NULL; 310 return NULL;
319 311
320 {
321 void hfsplus_inode_check(struct super_block *sb);
322 atomic_inc(&HFSPLUS_SB(sb).inode_cnt);
323 hfsplus_inode_check(sb);
324 }
325 inode->i_ino = HFSPLUS_SB(sb).next_cnid++; 312 inode->i_ino = HFSPLUS_SB(sb).next_cnid++;
326 inode->i_mode = mode; 313 inode->i_mode = mode;
327 inode->i_uid = current->fsuid; 314 inode->i_uid = current->fsuid;
@@ -444,7 +431,8 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
444 inode->i_size = 2 + be32_to_cpu(folder->valence); 431 inode->i_size = 2 + be32_to_cpu(folder->valence);
445 inode->i_atime = hfsp_mt2ut(folder->access_date); 432 inode->i_atime = hfsp_mt2ut(folder->access_date);
446 inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); 433 inode->i_mtime = hfsp_mt2ut(folder->content_mod_date);
447 inode->i_ctime = inode->i_mtime; 434 inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date);
435 HFSPLUS_I(inode).create_date = folder->create_date;
448 HFSPLUS_I(inode).fs_blocks = 0; 436 HFSPLUS_I(inode).fs_blocks = 0;
449 inode->i_op = &hfsplus_dir_inode_operations; 437 inode->i_op = &hfsplus_dir_inode_operations;
450 inode->i_fop = &hfsplus_dir_operations; 438 inode->i_fop = &hfsplus_dir_operations;
@@ -475,9 +463,10 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
475 } 463 }
476 inode->i_atime = hfsp_mt2ut(file->access_date); 464 inode->i_atime = hfsp_mt2ut(file->access_date);
477 inode->i_mtime = hfsp_mt2ut(file->content_mod_date); 465 inode->i_mtime = hfsp_mt2ut(file->content_mod_date);
478 inode->i_ctime = inode->i_mtime; 466 inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date);
467 HFSPLUS_I(inode).create_date = file->create_date;
479 } else { 468 } else {
480 printk("HFS+-fs: bad catalog entry used to create inode\n"); 469 printk(KERN_ERR "hfs: bad catalog entry used to create inode\n");
481 res = -EIO; 470 res = -EIO;
482 } 471 }
483 return res; 472 return res;
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index e07aa096e07c..13cf848ac833 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -12,6 +12,7 @@
12 * hfsplus ioctls 12 * hfsplus ioctls
13 */ 13 */
14 14
15#include <linux/capability.h>
15#include <linux/fs.h> 16#include <linux/fs.h>
16#include <linux/sched.h> 17#include <linux/sched.h>
17#include <linux/xattr.h> 18#include <linux/xattr.h>
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index 935dafba0078..dc64fac00831 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -83,58 +83,58 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
83 switch (token) { 83 switch (token) {
84 case opt_creator: 84 case opt_creator:
85 if (match_fourchar(&args[0], &sbi->creator)) { 85 if (match_fourchar(&args[0], &sbi->creator)) {
86 printk("HFS+-fs: creator requires a 4 character value\n"); 86 printk(KERN_ERR "hfs: creator requires a 4 character value\n");
87 return 0; 87 return 0;
88 } 88 }
89 break; 89 break;
90 case opt_type: 90 case opt_type:
91 if (match_fourchar(&args[0], &sbi->type)) { 91 if (match_fourchar(&args[0], &sbi->type)) {
92 printk("HFS+-fs: type requires a 4 character value\n"); 92 printk(KERN_ERR "hfs: type requires a 4 character value\n");
93 return 0; 93 return 0;
94 } 94 }
95 break; 95 break;
96 case opt_umask: 96 case opt_umask:
97 if (match_octal(&args[0], &tmp)) { 97 if (match_octal(&args[0], &tmp)) {
98 printk("HFS+-fs: umask requires a value\n"); 98 printk(KERN_ERR "hfs: umask requires a value\n");
99 return 0; 99 return 0;
100 } 100 }
101 sbi->umask = (umode_t)tmp; 101 sbi->umask = (umode_t)tmp;
102 break; 102 break;
103 case opt_uid: 103 case opt_uid:
104 if (match_int(&args[0], &tmp)) { 104 if (match_int(&args[0], &tmp)) {
105 printk("HFS+-fs: uid requires an argument\n"); 105 printk(KERN_ERR "hfs: uid requires an argument\n");
106 return 0; 106 return 0;
107 } 107 }
108 sbi->uid = (uid_t)tmp; 108 sbi->uid = (uid_t)tmp;
109 break; 109 break;
110 case opt_gid: 110 case opt_gid:
111 if (match_int(&args[0], &tmp)) { 111 if (match_int(&args[0], &tmp)) {
112 printk("HFS+-fs: gid requires an argument\n"); 112 printk(KERN_ERR "hfs: gid requires an argument\n");
113 return 0; 113 return 0;
114 } 114 }
115 sbi->gid = (gid_t)tmp; 115 sbi->gid = (gid_t)tmp;
116 break; 116 break;
117 case opt_part: 117 case opt_part:
118 if (match_int(&args[0], &sbi->part)) { 118 if (match_int(&args[0], &sbi->part)) {
119 printk("HFS+-fs: part requires an argument\n"); 119 printk(KERN_ERR "hfs: part requires an argument\n");
120 return 0; 120 return 0;
121 } 121 }
122 break; 122 break;
123 case opt_session: 123 case opt_session:
124 if (match_int(&args[0], &sbi->session)) { 124 if (match_int(&args[0], &sbi->session)) {
125 printk("HFS+-fs: session requires an argument\n"); 125 printk(KERN_ERR "hfs: session requires an argument\n");
126 return 0; 126 return 0;
127 } 127 }
128 break; 128 break;
129 case opt_nls: 129 case opt_nls:
130 if (sbi->nls) { 130 if (sbi->nls) {
131 printk("HFS+-fs: unable to change nls mapping\n"); 131 printk(KERN_ERR "hfs: unable to change nls mapping\n");
132 return 0; 132 return 0;
133 } 133 }
134 p = match_strdup(&args[0]); 134 p = match_strdup(&args[0]);
135 sbi->nls = load_nls(p); 135 sbi->nls = load_nls(p);
136 if (!sbi->nls) { 136 if (!sbi->nls) {
137 printk("HFS+-fs: unable to load nls mapping \"%s\"\n", p); 137 printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p);
138 kfree(p); 138 kfree(p);
139 return 0; 139 return 0;
140 } 140 }
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 6daaf7c755a6..7843f792a4b7 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -22,29 +22,12 @@ static void hfsplus_destroy_inode(struct inode *inode);
22 22
23#include "hfsplus_fs.h" 23#include "hfsplus_fs.h"
24 24
25void hfsplus_inode_check(struct super_block *sb)
26{
27#if 0
28 u32 cnt = atomic_read(&HFSPLUS_SB(sb).inode_cnt);
29 u32 last_cnt = HFSPLUS_SB(sb).last_inode_cnt;
30
31 if (cnt <= (last_cnt / 2) ||
32 cnt >= (last_cnt * 2)) {
33 HFSPLUS_SB(sb).last_inode_cnt = cnt;
34 printk("inode_check: %u,%u,%u\n", cnt, last_cnt,
35 HFSPLUS_SB(sb).cat_tree ? HFSPLUS_SB(sb).cat_tree->node_hash_cnt : 0);
36 }
37#endif
38}
39
40static void hfsplus_read_inode(struct inode *inode) 25static void hfsplus_read_inode(struct inode *inode)
41{ 26{
42 struct hfs_find_data fd; 27 struct hfs_find_data fd;
43 struct hfsplus_vh *vhdr; 28 struct hfsplus_vh *vhdr;
44 int err; 29 int err;
45 30
46 atomic_inc(&HFSPLUS_SB(inode->i_sb).inode_cnt);
47 hfsplus_inode_check(inode->i_sb);
48 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); 31 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
49 init_MUTEX(&HFSPLUS_I(inode).extents_lock); 32 init_MUTEX(&HFSPLUS_I(inode).extents_lock);
50 HFSPLUS_I(inode).flags = 0; 33 HFSPLUS_I(inode).flags = 0;
@@ -155,12 +138,10 @@ static int hfsplus_write_inode(struct inode *inode, int unused)
155static void hfsplus_clear_inode(struct inode *inode) 138static void hfsplus_clear_inode(struct inode *inode)
156{ 139{
157 dprint(DBG_INODE, "hfsplus_clear_inode: %lu\n", inode->i_ino); 140 dprint(DBG_INODE, "hfsplus_clear_inode: %lu\n", inode->i_ino);
158 atomic_dec(&HFSPLUS_SB(inode->i_sb).inode_cnt);
159 if (HFSPLUS_IS_RSRC(inode)) { 141 if (HFSPLUS_IS_RSRC(inode)) {
160 HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL; 142 HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL;
161 iput(HFSPLUS_I(inode).rsrc_inode); 143 iput(HFSPLUS_I(inode).rsrc_inode);
162 } 144 }
163 hfsplus_inode_check(inode->i_sb);
164} 145}
165 146
166static void hfsplus_write_super(struct super_block *sb) 147static void hfsplus_write_super(struct super_block *sb)
@@ -188,7 +169,7 @@ static void hfsplus_write_super(struct super_block *sb)
188 block = HFSPLUS_SB(sb).blockoffset; 169 block = HFSPLUS_SB(sb).blockoffset;
189 block += (HFSPLUS_SB(sb).sect_count - 2) >> (sb->s_blocksize_bits - 9); 170 block += (HFSPLUS_SB(sb).sect_count - 2) >> (sb->s_blocksize_bits - 9);
190 offset = ((HFSPLUS_SB(sb).sect_count - 2) << 9) & (sb->s_blocksize - 1); 171 offset = ((HFSPLUS_SB(sb).sect_count - 2) << 9) & (sb->s_blocksize - 1);
191 printk("backup: %u,%u,%u,%u\n", HFSPLUS_SB(sb).blockoffset, 172 printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n", HFSPLUS_SB(sb).blockoffset,
192 HFSPLUS_SB(sb).sect_count, block, offset); 173 HFSPLUS_SB(sb).sect_count, block, offset);
193 bh = sb_bread(sb, block); 174 bh = sb_bread(sb, block);
194 if (bh) { 175 if (bh) {
@@ -198,7 +179,7 @@ static void hfsplus_write_super(struct super_block *sb)
198 mark_buffer_dirty(bh); 179 mark_buffer_dirty(bh);
199 brelse(bh); 180 brelse(bh);
200 } else 181 } else
201 printk("backup not found!\n"); 182 printk(KERN_WARNING "hfs: backup not found!\n");
202 } 183 }
203 } 184 }
204 HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP; 185 HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP;
@@ -259,18 +240,18 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data)
259 return -EINVAL; 240 return -EINVAL;
260 241
261 if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) { 242 if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) {
262 printk("HFS+-fs warning: Filesystem was not cleanly unmounted, " 243 printk(KERN_WARNING "hfs: filesystem was not cleanly unmounted, "
263 "running fsck.hfsplus is recommended. leaving read-only.\n"); 244 "running fsck.hfsplus is recommended. leaving read-only.\n");
264 sb->s_flags |= MS_RDONLY; 245 sb->s_flags |= MS_RDONLY;
265 *flags |= MS_RDONLY; 246 *flags |= MS_RDONLY;
266 } else if (sbi.flags & HFSPLUS_SB_FORCE) { 247 } else if (sbi.flags & HFSPLUS_SB_FORCE) {
267 /* nothing */ 248 /* nothing */
268 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { 249 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {
269 printk("HFS+-fs: Filesystem is marked locked, leaving read-only.\n"); 250 printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n");
270 sb->s_flags |= MS_RDONLY; 251 sb->s_flags |= MS_RDONLY;
271 *flags |= MS_RDONLY; 252 *flags |= MS_RDONLY;
272 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { 253 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) {
273 printk("HFS+-fs: Filesystem is marked journaled, leaving read-only.\n"); 254 printk(KERN_WARNING "hfs: filesystem is marked journaled, leaving read-only.\n");
274 sb->s_flags |= MS_RDONLY; 255 sb->s_flags |= MS_RDONLY;
275 *flags |= MS_RDONLY; 256 *flags |= MS_RDONLY;
276 } 257 }
@@ -311,8 +292,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
311 INIT_HLIST_HEAD(&sbi->rsrc_inodes); 292 INIT_HLIST_HEAD(&sbi->rsrc_inodes);
312 hfsplus_fill_defaults(sbi); 293 hfsplus_fill_defaults(sbi);
313 if (!hfsplus_parse_options(data, sbi)) { 294 if (!hfsplus_parse_options(data, sbi)) {
314 if (!silent) 295 printk(KERN_ERR "hfs: unable to parse mount options\n");
315 printk("HFS+-fs: unable to parse mount options\n");
316 err = -EINVAL; 296 err = -EINVAL;
317 goto cleanup; 297 goto cleanup;
318 } 298 }
@@ -321,7 +301,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
321 nls = sbi->nls; 301 nls = sbi->nls;
322 sbi->nls = load_nls("utf8"); 302 sbi->nls = load_nls("utf8");
323 if (!sbi->nls) { 303 if (!sbi->nls) {
324 printk("HFS+: unable to load nls for utf8\n"); 304 printk(KERN_ERR "hfs: unable to load nls for utf8\n");
325 err = -EINVAL; 305 err = -EINVAL;
326 goto cleanup; 306 goto cleanup;
327 } 307 }
@@ -329,17 +309,17 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
329 /* Grab the volume header */ 309 /* Grab the volume header */
330 if (hfsplus_read_wrapper(sb)) { 310 if (hfsplus_read_wrapper(sb)) {
331 if (!silent) 311 if (!silent)
332 printk("HFS+-fs: unable to find HFS+ superblock\n"); 312 printk(KERN_WARNING "hfs: unable to find HFS+ superblock\n");
333 err = -EINVAL; 313 err = -EINVAL;
334 goto cleanup; 314 goto cleanup;
335 } 315 }
336 vhdr = HFSPLUS_SB(sb).s_vhdr; 316 vhdr = HFSPLUS_SB(sb).s_vhdr;
337 317
338 /* Copy parts of the volume header into the superblock */ 318 /* Copy parts of the volume header into the superblock */
339 sb->s_magic = be16_to_cpu(vhdr->signature); 319 sb->s_magic = HFSPLUS_VOLHEAD_SIG;
340 if (be16_to_cpu(vhdr->version) != HFSPLUS_CURRENT_VERSION) { 320 if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION ||
341 if (!silent) 321 be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) {
342 printk("HFS+-fs: wrong filesystem version\n"); 322 printk(KERN_ERR "hfs: wrong filesystem version\n");
343 goto cleanup; 323 goto cleanup;
344 } 324 }
345 HFSPLUS_SB(sb).total_blocks = be32_to_cpu(vhdr->total_blocks); 325 HFSPLUS_SB(sb).total_blocks = be32_to_cpu(vhdr->total_blocks);
@@ -360,20 +340,17 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
360 sb->s_maxbytes = MAX_LFS_FILESIZE; 340 sb->s_maxbytes = MAX_LFS_FILESIZE;
361 341
362 if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) { 342 if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) {
363 if (!silent) 343 printk(KERN_WARNING "hfs: Filesystem was not cleanly unmounted, "
364 printk("HFS+-fs warning: Filesystem was not cleanly unmounted, " 344 "running fsck.hfsplus is recommended. mounting read-only.\n");
365 "running fsck.hfsplus is recommended. mounting read-only.\n");
366 sb->s_flags |= MS_RDONLY; 345 sb->s_flags |= MS_RDONLY;
367 } else if (sbi->flags & HFSPLUS_SB_FORCE) { 346 } else if (sbi->flags & HFSPLUS_SB_FORCE) {
368 /* nothing */ 347 /* nothing */
369 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { 348 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {
370 if (!silent) 349 printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n");
371 printk("HFS+-fs: Filesystem is marked locked, mounting read-only.\n");
372 sb->s_flags |= MS_RDONLY; 350 sb->s_flags |= MS_RDONLY;
373 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { 351 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) {
374 if (!silent) 352 printk(KERN_WARNING "hfs: write access to a jounaled filesystem is not supported, "
375 printk("HFS+-fs: write access to a jounaled filesystem is not supported, " 353 "use the force option at your own risk, mounting read-only.\n");
376 "use the force option at your own risk, mounting read-only.\n");
377 sb->s_flags |= MS_RDONLY; 354 sb->s_flags |= MS_RDONLY;
378 } 355 }
379 sbi->flags &= ~HFSPLUS_SB_FORCE; 356 sbi->flags &= ~HFSPLUS_SB_FORCE;
@@ -381,21 +358,18 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
381 /* Load metadata objects (B*Trees) */ 358 /* Load metadata objects (B*Trees) */
382 HFSPLUS_SB(sb).ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); 359 HFSPLUS_SB(sb).ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID);
383 if (!HFSPLUS_SB(sb).ext_tree) { 360 if (!HFSPLUS_SB(sb).ext_tree) {
384 if (!silent) 361 printk(KERN_ERR "hfs: failed to load extents file\n");
385 printk("HFS+-fs: failed to load extents file\n");
386 goto cleanup; 362 goto cleanup;
387 } 363 }
388 HFSPLUS_SB(sb).cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); 364 HFSPLUS_SB(sb).cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID);
389 if (!HFSPLUS_SB(sb).cat_tree) { 365 if (!HFSPLUS_SB(sb).cat_tree) {
390 if (!silent) 366 printk(KERN_ERR "hfs: failed to load catalog file\n");
391 printk("HFS+-fs: failed to load catalog file\n");
392 goto cleanup; 367 goto cleanup;
393 } 368 }
394 369
395 HFSPLUS_SB(sb).alloc_file = iget(sb, HFSPLUS_ALLOC_CNID); 370 HFSPLUS_SB(sb).alloc_file = iget(sb, HFSPLUS_ALLOC_CNID);
396 if (!HFSPLUS_SB(sb).alloc_file) { 371 if (!HFSPLUS_SB(sb).alloc_file) {
397 if (!silent) 372 printk(KERN_ERR "hfs: failed to load allocation file\n");
398 printk("HFS+-fs: failed to load allocation file\n");
399 goto cleanup; 373 goto cleanup;
400 } 374 }
401 375
@@ -403,8 +377,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
403 root = iget(sb, HFSPLUS_ROOT_CNID); 377 root = iget(sb, HFSPLUS_ROOT_CNID);
404 sb->s_root = d_alloc_root(root); 378 sb->s_root = d_alloc_root(root);
405 if (!sb->s_root) { 379 if (!sb->s_root) {
406 if (!silent) 380 printk(KERN_ERR "hfs: failed to load root directory\n");
407 printk("HFS+-fs: failed to load root directory\n");
408 iput(root); 381 iput(root);
409 goto cleanup; 382 goto cleanup;
410 } 383 }
@@ -438,7 +411,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
438 sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh); 411 sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh);
439 412
440 if (!HFSPLUS_SB(sb).hidden_dir) { 413 if (!HFSPLUS_SB(sb).hidden_dir) {
441 printk("HFS+: create hidden dir...\n"); 414 printk(KERN_DEBUG "hfs: create hidden dir...\n");
442 HFSPLUS_SB(sb).hidden_dir = hfsplus_new_inode(sb, S_IFDIR); 415 HFSPLUS_SB(sb).hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
443 hfsplus_create_cat(HFSPLUS_SB(sb).hidden_dir->i_ino, sb->s_root->d_inode, 416 hfsplus_create_cat(HFSPLUS_SB(sb).hidden_dir->i_ino, sb->s_root->d_inode,
444 &str, HFSPLUS_SB(sb).hidden_dir); 417 &str, HFSPLUS_SB(sb).hidden_dir);
@@ -518,7 +491,7 @@ static void __exit exit_hfsplus_fs(void)
518{ 491{
519 unregister_filesystem(&hfsplus_fs_type); 492 unregister_filesystem(&hfsplus_fs_type);
520 if (kmem_cache_destroy(hfsplus_inode_cachep)) 493 if (kmem_cache_destroy(hfsplus_inode_cachep))
521 printk(KERN_INFO "hfsplus_inode_cache: not all structures were freed\n"); 494 printk(KERN_ERR "hfsplus_inode_cache: not all structures were freed\n");
522} 495}
523 496
524module_init(init_hfsplus_fs) 497module_init(init_hfsplus_fs)
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index 060c69048c3d..689c8bd721fb 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -28,7 +28,8 @@ static inline u16 case_fold(u16 c)
28} 28}
29 29
30/* Compare unicode strings, return values like normal strcmp */ 30/* Compare unicode strings, return values like normal strcmp */
31int hfsplus_unistrcmp(const struct hfsplus_unistr *s1, const struct hfsplus_unistr *s2) 31int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
32 const struct hfsplus_unistr *s2)
32{ 33{
33 u16 len1, len2, c1, c2; 34 u16 len1, len2, c1, c2;
34 const hfsplus_unichr *p1, *p2; 35 const hfsplus_unichr *p1, *p2;
@@ -59,6 +60,33 @@ int hfsplus_unistrcmp(const struct hfsplus_unistr *s1, const struct hfsplus_unis
59 } 60 }
60} 61}
61 62
63/* Compare names as a sequence of 16-bit unsigned integers */
64int hfsplus_strcmp(const struct hfsplus_unistr *s1,
65 const struct hfsplus_unistr *s2)
66{
67 u16 len1, len2, c1, c2;
68 const hfsplus_unichr *p1, *p2;
69 int len;
70
71 len1 = be16_to_cpu(s1->length);
72 len2 = be16_to_cpu(s2->length);
73 p1 = s1->unicode;
74 p2 = s2->unicode;
75
76 for (len = min(len1, len2); len > 0; len--) {
77 c1 = be16_to_cpu(*p1);
78 c2 = be16_to_cpu(*p2);
79 if (c1 != c2)
80 return c1 < c2 ? -1 : 1;
81 p1++;
82 p2++;
83 }
84
85 return len1 < len2 ? -1 :
86 len1 > len2 ? 1 : 0;
87}
88
89
62#define Hangul_SBase 0xac00 90#define Hangul_SBase 0xac00
63#define Hangul_LBase 0x1100 91#define Hangul_LBase 0x1100
64#define Hangul_VBase 0x1161 92#define Hangul_VBase 0x1161
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 95455e839231..72cab78f0509 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -28,8 +28,11 @@ static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
28{ 28{
29 u32 extent; 29 u32 extent;
30 u16 attrib; 30 u16 attrib;
31 __be16 sig;
31 32
32 if (be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_EMBEDSIG)) != HFSPLUS_VOLHEAD_SIG) 33 sig = *(__be16 *)(bufptr + HFSP_WRAPOFF_EMBEDSIG);
34 if (sig != cpu_to_be16(HFSPLUS_VOLHEAD_SIG) &&
35 sig != cpu_to_be16(HFSPLUS_VOLHEAD_SIGX))
33 return 0; 36 return 0;
34 37
35 attrib = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ATTRIB)); 38 attrib = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ATTRIB));
@@ -70,7 +73,7 @@ static int hfsplus_get_last_session(struct super_block *sb,
70 *start = (sector_t)te.cdte_addr.lba << 2; 73 *start = (sector_t)te.cdte_addr.lba << 2;
71 return 0; 74 return 0;
72 } 75 }
73 printk(KERN_ERR "HFS: Invalid session number or type of track\n"); 76 printk(KERN_ERR "hfs: invalid session number or type of track\n");
74 return -EINVAL; 77 return -EINVAL;
75 } 78 }
76 ms_info.addr_format = CDROM_LBA; 79 ms_info.addr_format = CDROM_LBA;
@@ -114,6 +117,10 @@ int hfsplus_read_wrapper(struct super_block *sb)
114 } 117 }
115 if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) 118 if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIG))
116 break; 119 break;
120 if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) {
121 HFSPLUS_SB(sb).flags |= HFSPLUS_SB_HFSX;
122 break;
123 }
117 brelse(bh); 124 brelse(bh);
118 125
119 /* check for a partition block 126 /* check for a partition block
@@ -143,7 +150,7 @@ int hfsplus_read_wrapper(struct super_block *sb)
143 blocksize >>= 1; 150 blocksize >>= 1;
144 151
145 if (sb_set_blocksize(sb, blocksize) != blocksize) { 152 if (sb_set_blocksize(sb, blocksize) != blocksize) {
146 printk("HFS+: unable to blocksize to %u!\n", blocksize); 153 printk(KERN_ERR "hfs: unable to set blocksize to %u!\n", blocksize);
147 return -EINVAL; 154 return -EINVAL;
148 } 155 }
149 156
@@ -158,7 +165,9 @@ int hfsplus_read_wrapper(struct super_block *sb)
158 return -EIO; 165 return -EIO;
159 166
160 /* should still be the same... */ 167 /* should still be the same... */
161 if (be16_to_cpu(vhdr->signature) != HFSPLUS_VOLHEAD_SIG) 168 if (vhdr->signature != (HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX ?
169 cpu_to_be16(HFSPLUS_VOLHEAD_SIGX) :
170 cpu_to_be16(HFSPLUS_VOLHEAD_SIG)))
162 goto error; 171 goto error;
163 HFSPLUS_SB(sb).s_vhbh = bh; 172 HFSPLUS_SB(sb).s_vhbh = bh;
164 HFSPLUS_SB(sb).s_vhdr = vhdr; 173 HFSPLUS_SB(sb).s_vhdr = vhdr;
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 0217c3a04441..5591f9623aa2 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -32,19 +32,19 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
32 32
33 /*printk("dir lseek\n");*/ 33 /*printk("dir lseek\n");*/
34 if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok; 34 if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok;
35 down(&i->i_sem); 35 mutex_lock(&i->i_mutex);
36 pos = ((loff_t) hpfs_de_as_down_as_possible(s, hpfs_inode->i_dno) << 4) + 1; 36 pos = ((loff_t) hpfs_de_as_down_as_possible(s, hpfs_inode->i_dno) << 4) + 1;
37 while (pos != new_off) { 37 while (pos != new_off) {
38 if (map_pos_dirent(i, &pos, &qbh)) hpfs_brelse4(&qbh); 38 if (map_pos_dirent(i, &pos, &qbh)) hpfs_brelse4(&qbh);
39 else goto fail; 39 else goto fail;
40 if (pos == 12) goto fail; 40 if (pos == 12) goto fail;
41 } 41 }
42 up(&i->i_sem); 42 mutex_unlock(&i->i_mutex);
43ok: 43ok:
44 unlock_kernel(); 44 unlock_kernel();
45 return filp->f_pos = new_off; 45 return filp->f_pos = new_off;
46fail: 46fail:
47 up(&i->i_sem); 47 mutex_unlock(&i->i_mutex);
48 /*printk("illegal lseek: %016llx\n", new_off);*/ 48 /*printk("illegal lseek: %016llx\n", new_off);*/
49 unlock_kernel(); 49 unlock_kernel();
50 return -ESPIPE; 50 return -ESPIPE;
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index 52930915bad8..a44dc5897399 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -171,12 +171,12 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
171 171
172 err = -ENOMEM; 172 err = -ENOMEM;
173 parent = HPPFS_I(ino)->proc_dentry; 173 parent = HPPFS_I(ino)->proc_dentry;
174 down(&parent->d_inode->i_sem); 174 mutex_lock(&parent->d_inode->i_mutex);
175 proc_dentry = d_lookup(parent, &dentry->d_name); 175 proc_dentry = d_lookup(parent, &dentry->d_name);
176 if(proc_dentry == NULL){ 176 if(proc_dentry == NULL){
177 proc_dentry = d_alloc(parent, &dentry->d_name); 177 proc_dentry = d_alloc(parent, &dentry->d_name);
178 if(proc_dentry == NULL){ 178 if(proc_dentry == NULL){
179 up(&parent->d_inode->i_sem); 179 mutex_unlock(&parent->d_inode->i_mutex);
180 goto out; 180 goto out;
181 } 181 }
182 new = (*parent->d_inode->i_op->lookup)(parent->d_inode, 182 new = (*parent->d_inode->i_op->lookup)(parent->d_inode,
@@ -186,7 +186,7 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
186 proc_dentry = new; 186 proc_dentry = new;
187 } 187 }
188 } 188 }
189 up(&parent->d_inode->i_sem); 189 mutex_unlock(&parent->d_inode->i_mutex);
190 190
191 if(IS_ERR(proc_dentry)) 191 if(IS_ERR(proc_dentry))
192 return(proc_dentry); 192 return(proc_dentry);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8c41315a6e42..f568102da1e8 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -18,6 +18,7 @@
18#include <linux/highmem.h> 18#include <linux/highmem.h>
19#include <linux/init.h> 19#include <linux/init.h>
20#include <linux/string.h> 20#include <linux/string.h>
21#include <linux/capability.h>
21#include <linux/backing-dev.h> 22#include <linux/backing-dev.h>
22#include <linux/hugetlb.h> 23#include <linux/hugetlb.h>
23#include <linux/pagevec.h> 24#include <linux/pagevec.h>
@@ -118,7 +119,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
118 119
119 vma_len = (loff_t)(vma->vm_end - vma->vm_start); 120 vma_len = (loff_t)(vma->vm_end - vma->vm_start);
120 121
121 down(&inode->i_sem); 122 mutex_lock(&inode->i_mutex);
122 file_accessed(file); 123 file_accessed(file);
123 vma->vm_flags |= VM_HUGETLB | VM_RESERVED; 124 vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
124 vma->vm_ops = &hugetlb_vm_ops; 125 vma->vm_ops = &hugetlb_vm_ops;
@@ -133,7 +134,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
133 if (inode->i_size < len) 134 if (inode->i_size < len)
134 inode->i_size = len; 135 inode->i_size = len;
135out: 136out:
136 up(&inode->i_sem); 137 mutex_unlock(&inode->i_mutex);
137 138
138 return ret; 139 return ret;
139} 140}
@@ -401,7 +402,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
401 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; 402 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
402 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 403 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
403 info = HUGETLBFS_I(inode); 404 info = HUGETLBFS_I(inode);
404 mpol_shared_policy_init(&info->policy); 405 mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL);
405 switch (mode & S_IFMT) { 406 switch (mode & S_IFMT) {
406 default: 407 default:
407 init_special_inode(inode, mode, dev); 408 init_special_inode(inode, mode, dev);
diff --git a/fs/inode.c b/fs/inode.c
index d8d04bd72b59..108138d4e909 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -22,6 +22,7 @@
22#include <linux/cdev.h> 22#include <linux/cdev.h>
23#include <linux/bootmem.h> 23#include <linux/bootmem.h>
24#include <linux/inotify.h> 24#include <linux/inotify.h>
25#include <linux/mount.h>
25 26
26/* 27/*
27 * This is needed for the following functions: 28 * This is needed for the following functions:
@@ -192,7 +193,7 @@ void inode_init_once(struct inode *inode)
192 INIT_HLIST_NODE(&inode->i_hash); 193 INIT_HLIST_NODE(&inode->i_hash);
193 INIT_LIST_HEAD(&inode->i_dentry); 194 INIT_LIST_HEAD(&inode->i_dentry);
194 INIT_LIST_HEAD(&inode->i_devices); 195 INIT_LIST_HEAD(&inode->i_devices);
195 sema_init(&inode->i_sem, 1); 196 mutex_init(&inode->i_mutex);
196 init_rwsem(&inode->i_alloc_sem); 197 init_rwsem(&inode->i_alloc_sem);
197 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 198 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
198 rwlock_init(&inode->i_data.tree_lock); 199 rwlock_init(&inode->i_data.tree_lock);
@@ -770,7 +771,7 @@ EXPORT_SYMBOL(igrab);
770 * 771 *
771 * Note, @test is called with the inode_lock held, so can't sleep. 772 * Note, @test is called with the inode_lock held, so can't sleep.
772 */ 773 */
773static inline struct inode *ifind(struct super_block *sb, 774static struct inode *ifind(struct super_block *sb,
774 struct hlist_head *head, int (*test)(struct inode *, void *), 775 struct hlist_head *head, int (*test)(struct inode *, void *),
775 void *data, const int wait) 776 void *data, const int wait)
776{ 777{
@@ -804,7 +805,7 @@ static inline struct inode *ifind(struct super_block *sb,
804 * 805 *
805 * Otherwise NULL is returned. 806 * Otherwise NULL is returned.
806 */ 807 */
807static inline struct inode *ifind_fast(struct super_block *sb, 808static struct inode *ifind_fast(struct super_block *sb,
808 struct hlist_head *head, unsigned long ino) 809 struct hlist_head *head, unsigned long ino)
809{ 810{
810 struct inode *inode; 811 struct inode *inode;
@@ -1176,22 +1177,33 @@ sector_t bmap(struct inode * inode, sector_t block)
1176EXPORT_SYMBOL(bmap); 1177EXPORT_SYMBOL(bmap);
1177 1178
1178/** 1179/**
1179 * update_atime - update the access time 1180 * touch_atime - update the access time
1181 * @mnt: mount the inode is accessed on
1180 * @inode: inode accessed 1182 * @inode: inode accessed
1181 * 1183 *
1182 * Update the accessed time on an inode and mark it for writeback. 1184 * Update the accessed time on an inode and mark it for writeback.
1183 * This function automatically handles read only file systems and media, 1185 * This function automatically handles read only file systems and media,
1184 * as well as the "noatime" flag and inode specific "noatime" markers. 1186 * as well as the "noatime" flag and inode specific "noatime" markers.
1185 */ 1187 */
1186void update_atime(struct inode *inode) 1188void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
1187{ 1189{
1190 struct inode *inode = dentry->d_inode;
1188 struct timespec now; 1191 struct timespec now;
1189 1192
1190 if (IS_NOATIME(inode)) 1193 if (IS_RDONLY(inode))
1191 return; 1194 return;
1192 if (IS_NODIRATIME(inode) && S_ISDIR(inode->i_mode)) 1195
1196 if ((inode->i_flags & S_NOATIME) ||
1197 (inode->i_sb->s_flags & MS_NOATIME) ||
1198 ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)))
1193 return; 1199 return;
1194 if (IS_RDONLY(inode)) 1200
1201 /*
1202 * We may have a NULL vfsmount when coming from NFSD
1203 */
1204 if (mnt &&
1205 ((mnt->mnt_flags & MNT_NOATIME) ||
1206 ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))))
1195 return; 1207 return;
1196 1208
1197 now = current_fs_time(inode->i_sb); 1209 now = current_fs_time(inode->i_sb);
@@ -1201,19 +1213,23 @@ void update_atime(struct inode *inode)
1201 } 1213 }
1202} 1214}
1203 1215
1204EXPORT_SYMBOL(update_atime); 1216EXPORT_SYMBOL(touch_atime);
1205 1217
1206/** 1218/**
1207 * inode_update_time - update mtime and ctime time 1219 * file_update_time - update mtime and ctime time
1208 * @inode: inode accessed 1220 * @file: file accessed
1209 * @ctime_too: update ctime too
1210 * 1221 *
1211 * Update the mtime time on an inode and mark it for writeback. 1222 * Update the mtime and ctime members of an inode and mark the inode
1212 * When ctime_too is specified update the ctime too. 1223 * for writeback. Note that this function is meant exclusively for
1224 * usage in the file write path of filesystems, and filesystems may
1225 * choose to explicitly ignore update via this function with the
1226 * S_NOCTIME inode flag, e.g. for network filesystem where these
1227 * timestamps are handled by the server.
1213 */ 1228 */
1214 1229
1215void inode_update_time(struct inode *inode, int ctime_too) 1230void file_update_time(struct file *file)
1216{ 1231{
1232 struct inode *inode = file->f_dentry->d_inode;
1217 struct timespec now; 1233 struct timespec now;
1218 int sync_it = 0; 1234 int sync_it = 0;
1219 1235
@@ -1227,16 +1243,15 @@ void inode_update_time(struct inode *inode, int ctime_too)
1227 sync_it = 1; 1243 sync_it = 1;
1228 inode->i_mtime = now; 1244 inode->i_mtime = now;
1229 1245
1230 if (ctime_too) { 1246 if (!timespec_equal(&inode->i_ctime, &now))
1231 if (!timespec_equal(&inode->i_ctime, &now)) 1247 sync_it = 1;
1232 sync_it = 1; 1248 inode->i_ctime = now;
1233 inode->i_ctime = now; 1249
1234 }
1235 if (sync_it) 1250 if (sync_it)
1236 mark_inode_dirty_sync(inode); 1251 mark_inode_dirty_sync(inode);
1237} 1252}
1238 1253
1239EXPORT_SYMBOL(inode_update_time); 1254EXPORT_SYMBOL(file_update_time);
1240 1255
1241int inode_needs_sync(struct inode *inode) 1256int inode_needs_sync(struct inode *inode)
1242{ 1257{
diff --git a/fs/inotify.c b/fs/inotify.c
index 2fecb7af4a77..878ccca61213 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -33,6 +33,7 @@
33#include <linux/list.h> 33#include <linux/list.h>
34#include <linux/writeback.h> 34#include <linux/writeback.h>
35#include <linux/inotify.h> 35#include <linux/inotify.h>
36#include <linux/syscalls.h>
36 37
37#include <asm/ioctls.h> 38#include <asm/ioctls.h>
38 39
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 569209181425..f8aeec3ca10c 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -8,6 +8,7 @@
8#include <linux/syscalls.h> 8#include <linux/syscalls.h>
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/smp_lock.h> 10#include <linux/smp_lock.h>
11#include <linux/capability.h>
11#include <linux/file.h> 12#include <linux/file.h>
12#include <linux/fs.h> 13#include <linux/fs.h>
13#include <linux/security.h> 14#include <linux/security.h>
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 4bf1c6365a19..ca77008146c0 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -22,6 +22,7 @@
22#include <linux/kernel.h> 22#include <linux/kernel.h>
23#include <linux/ioprio.h> 23#include <linux/ioprio.h>
24#include <linux/blkdev.h> 24#include <linux/blkdev.h>
25#include <linux/capability.h>
25#include <linux/syscalls.h> 26#include <linux/syscalls.h>
26 27
27static int set_task_ioprio(struct task_struct *task, int ioprio) 28static int set_task_ioprio(struct task_struct *task, int ioprio)
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index e37e82b7cbf0..e7ba0c30e071 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -185,8 +185,5 @@ struct dentry *isofs_lookup(struct inode * dir, struct dentry * dentry, struct n
185 } 185 }
186 } 186 }
187 unlock_kernel(); 187 unlock_kernel();
188 if (inode) 188 return d_splice_alias(inode, dentry);
189 return d_splice_alias(inode, dentry);
190 d_add(dentry, inode);
191 return NULL;
192} 189}
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index cb3cef525c3b..e6265a0b56b8 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -338,7 +338,7 @@ restart:
338 * done (maybe it's a new transaction, but it fell at the same 338 * done (maybe it's a new transaction, but it fell at the same
339 * address). 339 * address).
340 */ 340 */
341 if (journal->j_checkpoint_transactions == transaction || 341 if (journal->j_checkpoint_transactions == transaction &&
342 transaction->t_tid == this_tid) { 342 transaction->t_tid == this_tid) {
343 int batch_count = 0; 343 int batch_count = 0;
344 struct buffer_head *bhs[NR_BATCH]; 344 struct buffer_head *bhs[NR_BATCH];
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 002ad2bbc769..29e62d98bae6 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -829,7 +829,8 @@ restart_loop:
829 journal->j_committing_transaction = NULL; 829 journal->j_committing_transaction = NULL;
830 spin_unlock(&journal->j_state_lock); 830 spin_unlock(&journal->j_state_lock);
831 831
832 if (commit_transaction->t_checkpoint_list == NULL) { 832 if (commit_transaction->t_checkpoint_list == NULL &&
833 commit_transaction->t_checkpoint_io_list == NULL) {
833 __journal_drop_transaction(journal, commit_transaction); 834 __journal_drop_transaction(journal, commit_transaction);
834 } else { 835 } else {
835 if (journal->j_checkpoint_transactions == NULL) { 836 if (journal->j_checkpoint_transactions == NULL) {
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 3dcc6d2162cb..fc3855a1aef3 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -757,7 +757,7 @@ jffs_do_readpage_nolock(struct file *file, struct page *page)
757 757
758 read_len = 0; 758 read_len = 0;
759 result = 0; 759 result = 0;
760 offset = page->index << PAGE_CACHE_SHIFT; 760 offset = page_offset(page);
761 761
762 kmap(page); 762 kmap(page);
763 buf = page_address(page); 763 buf = page_address(page);
@@ -1415,7 +1415,7 @@ jffs_file_write(struct file *filp, const char *buf, size_t count,
1415 * This will never trigger with sane page sizes. leave it in 1415 * This will never trigger with sane page sizes. leave it in
1416 * anyway, since I'm thinking about how to merge larger writes 1416 * anyway, since I'm thinking about how to merge larger writes
1417 * (the current idea is to poke a thread that does the actual 1417 * (the current idea is to poke a thread that does the actual
1418 * I/O and starts by doing a down(&inode->i_sem). then we 1418 * I/O and starts by doing a mutex_lock(&inode->i_mutex). then we
1419 * would need to get the page cache pages and have a list of 1419 * would need to get the page cache pages and have a list of
1420 * I/O requests and do write-merging here. 1420 * I/O requests and do write-merging here.
1421 * -- prumpf 1421 * -- prumpf
@@ -1545,7 +1545,7 @@ jffs_commit_write(struct file *filp, struct page *page,
1545{ 1545{
1546 void *addr = page_address(page) + from; 1546 void *addr = page_address(page) + from;
1547 /* XXX: PAGE_CACHE_SHIFT or PAGE_SHIFT */ 1547 /* XXX: PAGE_CACHE_SHIFT or PAGE_SHIFT */
1548 loff_t pos = (page->index<<PAGE_CACHE_SHIFT) + from; 1548 loff_t pos = page_offset(page) + from;
1549 1549
1550 return jffs_file_write(filp, addr, to-from, &pos); 1550 return jffs_file_write(filp, addr, to-from, &pos);
1551} /* jffs_commit_write() */ 1551} /* jffs_commit_write() */
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index fff108bb118b..70f7a896c04a 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -47,7 +47,7 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c)
47 ic = next_inode(&i, ic, (c))) 47 ic = next_inode(&i, ic, (c)))
48 48
49 49
50static inline void jffs2_build_inode_pass1(struct jffs2_sb_info *c, 50static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
51 struct jffs2_inode_cache *ic) 51 struct jffs2_inode_cache *ic)
52{ 52{
53 struct jffs2_full_dirent *fd; 53 struct jffs2_full_dirent *fd;
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index d0fcc5f3497e..09e5d10b8840 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -11,6 +11,7 @@
11 * 11 *
12 */ 12 */
13 13
14#include <linux/capability.h>
14#include <linux/config.h> 15#include <linux/config.h>
15#include <linux/kernel.h> 16#include <linux/kernel.h>
16#include <linux/sched.h> 17#include <linux/sched.h>
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index c79eebb8ab32..b635e167a3fa 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -134,7 +134,7 @@ static void jffs2_fragtree_insert(struct jffs2_node_frag *newfrag, struct jffs2_
134/* 134/*
135 * Allocate and initializes a new fragment. 135 * Allocate and initializes a new fragment.
136 */ 136 */
137static inline struct jffs2_node_frag * new_fragment(struct jffs2_full_dnode *fn, uint32_t ofs, uint32_t size) 137static struct jffs2_node_frag * new_fragment(struct jffs2_full_dnode *fn, uint32_t ofs, uint32_t size)
138{ 138{
139 struct jffs2_node_frag *newfrag; 139 struct jffs2_node_frag *newfrag;
140 140
@@ -513,7 +513,7 @@ free_out:
513 * 513 *
514 * Checks the node if we are in the checking stage. 514 * Checks the node if we are in the checking stage.
515 */ 515 */
516static inline int check_node(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn) 516static int check_node(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn)
517{ 517{
518 int ret; 518 int ret;
519 519
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 68000a50ceb6..2967b7393415 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -302,8 +302,7 @@ int dbSync(struct inode *ipbmap)
302 /* 302 /*
303 * write out dirty pages of bmap 303 * write out dirty pages of bmap
304 */ 304 */
305 filemap_fdatawrite(ipbmap->i_mapping); 305 filemap_write_and_wait(ipbmap->i_mapping);
306 filemap_fdatawait(ipbmap->i_mapping);
307 306
308 diWriteSpecial(ipbmap, 0); 307 diWriteSpecial(ipbmap, 0);
309 308
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 28201b194f53..31b4aa13dd4b 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -265,8 +265,7 @@ int diSync(struct inode *ipimap)
265 /* 265 /*
266 * write out dirty pages of imap 266 * write out dirty pages of imap
267 */ 267 */
268 filemap_fdatawrite(ipimap->i_mapping); 268 filemap_write_and_wait(ipimap->i_mapping);
269 filemap_fdatawait(ipimap->i_mapping);
270 269
271 diWriteSpecial(ipimap, 0); 270 diWriteSpecial(ipimap, 0);
272 271
@@ -565,8 +564,7 @@ void diFreeSpecial(struct inode *ip)
565 jfs_err("diFreeSpecial called with NULL ip!"); 564 jfs_err("diFreeSpecial called with NULL ip!");
566 return; 565 return;
567 } 566 }
568 filemap_fdatawrite(ip->i_mapping); 567 filemap_write_and_wait(ip->i_mapping);
569 filemap_fdatawait(ip->i_mapping);
570 truncate_inode_pages(ip->i_mapping, 0); 568 truncate_inode_pages(ip->i_mapping, 0);
571 iput(ip); 569 iput(ip);
572} 570}
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index c0fd7b3eadc6..dc21a5bd54d4 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -58,7 +58,7 @@ struct jfs_inode_info {
58 /* 58 /*
59 * rdwrlock serializes xtree between reads & writes and synchronizes 59 * rdwrlock serializes xtree between reads & writes and synchronizes
60 * changes to special inodes. It's use would be redundant on 60 * changes to special inodes. It's use would be redundant on
61 * directories since the i_sem taken in the VFS is sufficient. 61 * directories since the i_mutex taken in the VFS is sufficient.
62 */ 62 */
63 struct rw_semaphore rdwrlock; 63 struct rw_semaphore rdwrlock;
64 /* 64 /*
@@ -68,7 +68,7 @@ struct jfs_inode_info {
68 * inode is blocked in txBegin or TxBeginAnon 68 * inode is blocked in txBegin or TxBeginAnon
69 */ 69 */
70 struct semaphore commit_sem; 70 struct semaphore commit_sem;
71 /* xattr_sem allows us to access the xattrs without taking i_sem */ 71 /* xattr_sem allows us to access the xattrs without taking i_mutex */
72 struct rw_semaphore xattr_sem; 72 struct rw_semaphore xattr_sem;
73 lid_t xtlid; /* lid of xtree lock on directory */ 73 lid_t xtlid; /* lid of xtree lock on directory */
74#ifdef CONFIG_JFS_POSIX_ACL 74#ifdef CONFIG_JFS_POSIX_ACL
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index b660c93c92de..2ddb6b892bcf 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1231,10 +1231,8 @@ int txCommit(tid_t tid, /* transaction identifier */
1231 * when we don't need to worry about it at all. 1231 * when we don't need to worry about it at all.
1232 * 1232 *
1233 * if ((!S_ISDIR(ip->i_mode)) 1233 * if ((!S_ISDIR(ip->i_mode))
1234 * && (tblk->flag & COMMIT_DELETE) == 0) { 1234 * && (tblk->flag & COMMIT_DELETE) == 0)
1235 * filemap_fdatawrite(ip->i_mapping); 1235 * filemap_write_and_wait(ip->i_mapping);
1236 * filemap_fdatawait(ip->i_mapping);
1237 * }
1238 */ 1236 */
1239 1237
1240 /* 1238 /*
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
index 5cf91785b541..21eaf7ac0fcb 100644
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -108,8 +108,7 @@ int jfs_umount(struct super_block *sb)
108 * Make sure all metadata makes it to disk before we mark 108 * Make sure all metadata makes it to disk before we mark
109 * the superblock as clean 109 * the superblock as clean
110 */ 110 */
111 filemap_fdatawrite(sbi->direct_inode->i_mapping); 111 filemap_write_and_wait(sbi->direct_inode->i_mapping);
112 filemap_fdatawait(sbi->direct_inode->i_mapping);
113 112
114 /* 113 /*
115 * ensure all file system file pages are propagated to their 114 * ensure all file system file pages are propagated to their
@@ -161,8 +160,7 @@ int jfs_umount_rw(struct super_block *sb)
161 * mark the superblock clean before everything is flushed to 160 * mark the superblock clean before everything is flushed to
162 * disk. 161 * disk.
163 */ 162 */
164 filemap_fdatawrite(sbi->direct_inode->i_mapping); 163 filemap_write_and_wait(sbi->direct_inode->i_mapping);
165 filemap_fdatawait(sbi->direct_inode->i_mapping);
166 164
167 updateSuper(sb, FM_CLEAN); 165 updateSuper(sb, FM_CLEAN);
168 166
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index c6dc254d3253..45180361871c 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -376,8 +376,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
376 * by txCommit(); 376 * by txCommit();
377 */ 377 */
378 filemap_fdatawait(ipbmap->i_mapping); 378 filemap_fdatawait(ipbmap->i_mapping);
379 filemap_fdatawrite(ipbmap->i_mapping); 379 filemap_write_and_wait(ipbmap->i_mapping);
380 filemap_fdatawait(ipbmap->i_mapping);
381 diWriteSpecial(ipbmap, 0); 380 diWriteSpecial(ipbmap, 0);
382 381
383 newPage = nPages; /* first new page number */ 382 newPage = nPages; /* first new page number */
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4226af3ea91b..8d31f1336431 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -502,8 +502,7 @@ out_no_rw:
502 jfs_err("jfs_umount failed with return code %d", rc); 502 jfs_err("jfs_umount failed with return code %d", rc);
503 } 503 }
504out_mount_failed: 504out_mount_failed:
505 filemap_fdatawrite(sbi->direct_inode->i_mapping); 505 filemap_write_and_wait(sbi->direct_inode->i_mapping);
506 filemap_fdatawait(sbi->direct_inode->i_mapping);
507 truncate_inode_pages(sbi->direct_inode->i_mapping, 0); 506 truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
508 make_bad_inode(sbi->direct_inode); 507 make_bad_inode(sbi->direct_inode);
509 iput(sbi->direct_inode); 508 iput(sbi->direct_inode);
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 23aa5066b5a4..f23048f9471f 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -17,6 +17,7 @@
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
20#include <linux/capability.h>
20#include <linux/fs.h> 21#include <linux/fs.h>
21#include <linux/xattr.h> 22#include <linux/xattr.h>
22#include <linux/posix_acl_xattr.h> 23#include <linux/posix_acl_xattr.h>
@@ -83,21 +84,6 @@ struct ea_buffer {
83#define EA_NEW 0x0004 84#define EA_NEW 0x0004
84#define EA_MALLOC 0x0008 85#define EA_MALLOC 0x0008
85 86
86/* Namespaces */
87#define XATTR_SYSTEM_PREFIX "system."
88#define XATTR_SYSTEM_PREFIX_LEN (sizeof (XATTR_SYSTEM_PREFIX) - 1)
89
90#define XATTR_USER_PREFIX "user."
91#define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1)
92
93#define XATTR_OS2_PREFIX "os2."
94#define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1)
95
96/* XATTR_SECURITY_PREFIX is defined in include/linux/xattr.h */
97#define XATTR_SECURITY_PREFIX_LEN (sizeof (XATTR_SECURITY_PREFIX) - 1)
98
99#define XATTR_TRUSTED_PREFIX "trusted."
100#define XATTR_TRUSTED_PREFIX_LEN (sizeof (XATTR_TRUSTED_PREFIX) - 1)
101 87
102/* 88/*
103 * These three routines are used to recognize on-disk extended attributes 89 * These three routines are used to recognize on-disk extended attributes
@@ -773,36 +759,23 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
773static int can_set_xattr(struct inode *inode, const char *name, 759static int can_set_xattr(struct inode *inode, const char *name,
774 const void *value, size_t value_len) 760 const void *value, size_t value_len)
775{ 761{
776 if (IS_RDONLY(inode)) 762 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
777 return -EROFS;
778
779 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
780 return -EPERM;
781
782 if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0)
783 /*
784 * "system.*"
785 */
786 return can_set_system_xattr(inode, name, value, value_len); 763 return can_set_system_xattr(inode, name, value, value_len);
787 764
788 if(strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) 765 /*
789 return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM); 766 * Don't allow setting an attribute in an unknown namespace.
790 767 */
791#ifdef CONFIG_JFS_SECURITY 768 if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) &&
792 if (strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) 769 strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
793 == 0) 770 strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
794 return 0; /* Leave it to the security module */ 771 strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN))
795#endif
796
797 if((strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) != 0) &&
798 (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) != 0))
799 return -EOPNOTSUPP; 772 return -EOPNOTSUPP;
800 773
801 if (!S_ISREG(inode->i_mode) && 774 if (!S_ISREG(inode->i_mode) &&
802 (!S_ISDIR(inode->i_mode) || inode->i_mode &S_ISVTX)) 775 (!S_ISDIR(inode->i_mode) || inode->i_mode &S_ISVTX))
803 return -EPERM; 776 return -EPERM;
804 777
805 return permission(inode, MAY_WRITE, NULL); 778 return 0;
806} 779}
807 780
808int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name, 781int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name,
@@ -972,22 +945,6 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
972 return rc; 945 return rc;
973} 946}
974 947
975static int can_get_xattr(struct inode *inode, const char *name)
976{
977#ifdef CONFIG_JFS_SECURITY
978 if(strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0)
979 return 0;
980#endif
981
982 if(strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0)
983 return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
984
985 if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0)
986 return 0;
987
988 return permission(inode, MAY_READ, NULL);
989}
990
991ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, 948ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
992 size_t buf_size) 949 size_t buf_size)
993{ 950{
@@ -998,12 +955,8 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
998 ssize_t size; 955 ssize_t size;
999 int namelen = strlen(name); 956 int namelen = strlen(name);
1000 char *os2name = NULL; 957 char *os2name = NULL;
1001 int rc;
1002 char *value; 958 char *value;
1003 959
1004 if ((rc = can_get_xattr(inode, name)))
1005 return rc;
1006
1007 if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { 960 if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
1008 os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1, 961 os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
1009 GFP_KERNEL); 962 GFP_KERNEL);
diff --git a/fs/libfs.c b/fs/libfs.c
index 58101dff2c66..63c020e6589e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -74,7 +74,7 @@ int dcache_dir_close(struct inode *inode, struct file *file)
74 74
75loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) 75loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
76{ 76{
77 down(&file->f_dentry->d_inode->i_sem); 77 mutex_lock(&file->f_dentry->d_inode->i_mutex);
78 switch (origin) { 78 switch (origin) {
79 case 1: 79 case 1:
80 offset += file->f_pos; 80 offset += file->f_pos;
@@ -82,7 +82,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
82 if (offset >= 0) 82 if (offset >= 0)
83 break; 83 break;
84 default: 84 default:
85 up(&file->f_dentry->d_inode->i_sem); 85 mutex_unlock(&file->f_dentry->d_inode->i_mutex);
86 return -EINVAL; 86 return -EINVAL;
87 } 87 }
88 if (offset != file->f_pos) { 88 if (offset != file->f_pos) {
@@ -93,20 +93,20 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
93 loff_t n = file->f_pos - 2; 93 loff_t n = file->f_pos - 2;
94 94
95 spin_lock(&dcache_lock); 95 spin_lock(&dcache_lock);
96 list_del(&cursor->d_child); 96 list_del(&cursor->d_u.d_child);
97 p = file->f_dentry->d_subdirs.next; 97 p = file->f_dentry->d_subdirs.next;
98 while (n && p != &file->f_dentry->d_subdirs) { 98 while (n && p != &file->f_dentry->d_subdirs) {
99 struct dentry *next; 99 struct dentry *next;
100 next = list_entry(p, struct dentry, d_child); 100 next = list_entry(p, struct dentry, d_u.d_child);
101 if (!d_unhashed(next) && next->d_inode) 101 if (!d_unhashed(next) && next->d_inode)
102 n--; 102 n--;
103 p = p->next; 103 p = p->next;
104 } 104 }
105 list_add_tail(&cursor->d_child, p); 105 list_add_tail(&cursor->d_u.d_child, p);
106 spin_unlock(&dcache_lock); 106 spin_unlock(&dcache_lock);
107 } 107 }
108 } 108 }
109 up(&file->f_dentry->d_inode->i_sem); 109 mutex_unlock(&file->f_dentry->d_inode->i_mutex);
110 return offset; 110 return offset;
111} 111}
112 112
@@ -126,7 +126,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
126{ 126{
127 struct dentry *dentry = filp->f_dentry; 127 struct dentry *dentry = filp->f_dentry;
128 struct dentry *cursor = filp->private_data; 128 struct dentry *cursor = filp->private_data;
129 struct list_head *p, *q = &cursor->d_child; 129 struct list_head *p, *q = &cursor->d_u.d_child;
130 ino_t ino; 130 ino_t ino;
131 int i = filp->f_pos; 131 int i = filp->f_pos;
132 132
@@ -153,7 +153,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
153 } 153 }
154 for (p=q->next; p != &dentry->d_subdirs; p=p->next) { 154 for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
155 struct dentry *next; 155 struct dentry *next;
156 next = list_entry(p, struct dentry, d_child); 156 next = list_entry(p, struct dentry, d_u.d_child);
157 if (d_unhashed(next) || !next->d_inode) 157 if (d_unhashed(next) || !next->d_inode)
158 continue; 158 continue;
159 159
@@ -261,7 +261,7 @@ int simple_empty(struct dentry *dentry)
261 int ret = 0; 261 int ret = 0;
262 262
263 spin_lock(&dcache_lock); 263 spin_lock(&dcache_lock);
264 list_for_each_entry(child, &dentry->d_subdirs, d_child) 264 list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
265 if (simple_positive(child)) 265 if (simple_positive(child))
266 goto out; 266 goto out;
267 ret = 1; 267 ret = 1;
@@ -356,7 +356,7 @@ int simple_commit_write(struct file *file, struct page *page,
356 356
357 /* 357 /*
358 * No need to use i_size_read() here, the i_size 358 * No need to use i_size_read() here, the i_size
359 * cannot change under us because we hold the i_sem. 359 * cannot change under us because we hold the i_mutex.
360 */ 360 */
361 if (pos > inode->i_size) 361 if (pos > inode->i_size)
362 i_size_write(inode, pos); 362 i_size_write(inode, pos);
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index f01e9c0d2677..200fbda2c6d1 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -44,7 +44,7 @@ loff_t_to_s32(loff_t offset)
44/* 44/*
45 * XDR functions for basic NLM types 45 * XDR functions for basic NLM types
46 */ 46 */
47static inline u32 *nlm_decode_cookie(u32 *p, struct nlm_cookie *c) 47static u32 *nlm_decode_cookie(u32 *p, struct nlm_cookie *c)
48{ 48{
49 unsigned int len; 49 unsigned int len;
50 50
@@ -79,7 +79,7 @@ nlm_encode_cookie(u32 *p, struct nlm_cookie *c)
79 return p; 79 return p;
80} 80}
81 81
82static inline u32 * 82static u32 *
83nlm_decode_fh(u32 *p, struct nfs_fh *f) 83nlm_decode_fh(u32 *p, struct nfs_fh *f)
84{ 84{
85 unsigned int len; 85 unsigned int len;
@@ -119,7 +119,7 @@ nlm_encode_oh(u32 *p, struct xdr_netobj *oh)
119 return xdr_encode_netobj(p, oh); 119 return xdr_encode_netobj(p, oh);
120} 120}
121 121
122static inline u32 * 122static u32 *
123nlm_decode_lock(u32 *p, struct nlm_lock *lock) 123nlm_decode_lock(u32 *p, struct nlm_lock *lock)
124{ 124{
125 struct file_lock *fl = &lock->fl; 125 struct file_lock *fl = &lock->fl;
diff --git a/fs/locks.c b/fs/locks.c
index fb32d6218e21..909eab8fb1d0 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -154,7 +154,7 @@ static struct file_lock *locks_alloc_lock(void)
154} 154}
155 155
156/* Free a lock which is not in use. */ 156/* Free a lock which is not in use. */
157static inline void locks_free_lock(struct file_lock *fl) 157static void locks_free_lock(struct file_lock *fl)
158{ 158{
159 if (fl == NULL) { 159 if (fl == NULL) {
160 BUG(); 160 BUG();
@@ -475,8 +475,7 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
475/* 475/*
476 * Check whether two locks have the same owner. 476 * Check whether two locks have the same owner.
477 */ 477 */
478static inline int 478static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
479posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
480{ 479{
481 if (fl1->fl_lmops && fl1->fl_lmops->fl_compare_owner) 480 if (fl1->fl_lmops && fl1->fl_lmops->fl_compare_owner)
482 return fl2->fl_lmops == fl1->fl_lmops && 481 return fl2->fl_lmops == fl1->fl_lmops &&
@@ -487,7 +486,7 @@ posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
487/* Remove waiter from blocker's block list. 486/* Remove waiter from blocker's block list.
488 * When blocker ends up pointing to itself then the list is empty. 487 * When blocker ends up pointing to itself then the list is empty.
489 */ 488 */
490static inline void __locks_delete_block(struct file_lock *waiter) 489static void __locks_delete_block(struct file_lock *waiter)
491{ 490{
492 list_del_init(&waiter->fl_block); 491 list_del_init(&waiter->fl_block);
493 list_del_init(&waiter->fl_link); 492 list_del_init(&waiter->fl_link);
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 0f1e4530670f..f5bbe4c97c58 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -126,7 +126,7 @@ __mb_cache_entry_is_hashed(struct mb_cache_entry *ce)
126} 126}
127 127
128 128
129static inline void 129static void
130__mb_cache_entry_unhash(struct mb_cache_entry *ce) 130__mb_cache_entry_unhash(struct mb_cache_entry *ce)
131{ 131{
132 int n; 132 int n;
@@ -139,7 +139,7 @@ __mb_cache_entry_unhash(struct mb_cache_entry *ce)
139} 139}
140 140
141 141
142static inline void 142static void
143__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) 143__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
144{ 144{
145 struct mb_cache *cache = ce->e_cache; 145 struct mb_cache *cache = ce->e_cache;
@@ -158,7 +158,7 @@ __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
158} 158}
159 159
160 160
161static inline void 161static void
162__mb_cache_entry_release_unlock(struct mb_cache_entry *ce) 162__mb_cache_entry_release_unlock(struct mb_cache_entry *ce)
163{ 163{
164 /* Wake up all processes queuing for this cache entry. */ 164 /* Wake up all processes queuing for this cache entry. */
diff --git a/fs/mpage.c b/fs/mpage.c
index f1d2d02bd4c8..e431cb3878d6 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -184,7 +184,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
184 if (page_has_buffers(page)) 184 if (page_has_buffers(page))
185 goto confused; 185 goto confused;
186 186
187 block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); 187 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
188 last_block = (i_size_read(inode) + blocksize - 1) >> blkbits; 188 last_block = (i_size_read(inode) + blocksize - 1) >> blkbits;
189 189
190 bh.b_page = page; 190 bh.b_page = page;
@@ -466,7 +466,7 @@ __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
466 * The page has no buffers: map it to disk 466 * The page has no buffers: map it to disk
467 */ 467 */
468 BUG_ON(!PageUptodate(page)); 468 BUG_ON(!PageUptodate(page));
469 block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); 469 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
470 last_block = (i_size - 1) >> blkbits; 470 last_block = (i_size - 1) >> blkbits;
471 map_bh.b_page = page; 471 map_bh.b_page = page;
472 for (page_block = 0; page_block < blocks_per_page; ) { 472 for (page_block = 0; page_block < blocks_per_page; ) {
diff --git a/fs/namei.c b/fs/namei.c
index 6dbbd42d8b95..4acdac043b6b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -28,7 +28,10 @@
28#include <linux/syscalls.h> 28#include <linux/syscalls.h>
29#include <linux/mount.h> 29#include <linux/mount.h>
30#include <linux/audit.h> 30#include <linux/audit.h>
31#include <linux/capability.h>
31#include <linux/file.h> 32#include <linux/file.h>
33#include <linux/fcntl.h>
34#include <linux/namei.h>
32#include <asm/namei.h> 35#include <asm/namei.h>
33#include <asm/uaccess.h> 36#include <asm/uaccess.h>
34 37
@@ -112,7 +115,7 @@
112 * POSIX.1 2.4: an empty pathname is invalid (ENOENT). 115 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
113 * PATH_MAX includes the nul terminator --RR. 116 * PATH_MAX includes the nul terminator --RR.
114 */ 117 */
115static inline int do_getname(const char __user *filename, char *page) 118static int do_getname(const char __user *filename, char *page)
116{ 119{
117 int retval; 120 int retval;
118 unsigned long len = PATH_MAX; 121 unsigned long len = PATH_MAX;
@@ -395,7 +398,7 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name,
395 * short-cut DAC fails, then call permission() to do more 398 * short-cut DAC fails, then call permission() to do more
396 * complete permission check. 399 * complete permission check.
397 */ 400 */
398static inline int exec_permission_lite(struct inode *inode, 401static int exec_permission_lite(struct inode *inode,
399 struct nameidata *nd) 402 struct nameidata *nd)
400{ 403{
401 umode_t mode = inode->i_mode; 404 umode_t mode = inode->i_mode;
@@ -438,7 +441,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
438 struct dentry * result; 441 struct dentry * result;
439 struct inode *dir = parent->d_inode; 442 struct inode *dir = parent->d_inode;
440 443
441 down(&dir->i_sem); 444 mutex_lock(&dir->i_mutex);
442 /* 445 /*
443 * First re-do the cached lookup just in case it was created 446 * First re-do the cached lookup just in case it was created
444 * while we waited for the directory semaphore.. 447 * while we waited for the directory semaphore..
@@ -464,7 +467,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
464 else 467 else
465 result = dentry; 468 result = dentry;
466 } 469 }
467 up(&dir->i_sem); 470 mutex_unlock(&dir->i_mutex);
468 return result; 471 return result;
469 } 472 }
470 473
@@ -472,7 +475,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
472 * Uhhuh! Nasty case: the cache was re-populated while 475 * Uhhuh! Nasty case: the cache was re-populated while
473 * we waited on the semaphore. Need to revalidate. 476 * we waited on the semaphore. Need to revalidate.
474 */ 477 */
475 up(&dir->i_sem); 478 mutex_unlock(&dir->i_mutex);
476 if (result->d_op && result->d_op->d_revalidate) { 479 if (result->d_op && result->d_op->d_revalidate) {
477 if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { 480 if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
478 dput(result); 481 dput(result);
@@ -485,7 +488,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
485static int __emul_lookup_dentry(const char *, struct nameidata *); 488static int __emul_lookup_dentry(const char *, struct nameidata *);
486 489
487/* SMP-safe */ 490/* SMP-safe */
488static inline int 491static __always_inline int
489walk_init_root(const char *name, struct nameidata *nd) 492walk_init_root(const char *name, struct nameidata *nd)
490{ 493{
491 read_lock(&current->fs->lock); 494 read_lock(&current->fs->lock);
@@ -503,7 +506,7 @@ walk_init_root(const char *name, struct nameidata *nd)
503 return 1; 506 return 1;
504} 507}
505 508
506static inline int __vfs_follow_link(struct nameidata *nd, const char *link) 509static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
507{ 510{
508 int res = 0; 511 int res = 0;
509 char *name; 512 char *name;
@@ -543,7 +546,7 @@ struct path {
543 struct dentry *dentry; 546 struct dentry *dentry;
544}; 547};
545 548
546static inline int __do_follow_link(struct path *path, struct nameidata *nd) 549static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd)
547{ 550{
548 int error; 551 int error;
549 void *cookie; 552 void *cookie;
@@ -689,7 +692,7 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry)
689 return 0; 692 return 0;
690} 693}
691 694
692static inline void follow_dotdot(struct nameidata *nd) 695static __always_inline void follow_dotdot(struct nameidata *nd)
693{ 696{
694 while(1) { 697 while(1) {
695 struct vfsmount *parent; 698 struct vfsmount *parent;
@@ -1062,7 +1065,8 @@ set_it:
1062} 1065}
1063 1066
1064/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1067/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1065int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd) 1068static int fastcall do_path_lookup(int dfd, const char *name,
1069 unsigned int flags, struct nameidata *nd)
1066{ 1070{
1067 int retval = 0; 1071 int retval = 0;
1068 1072
@@ -1082,9 +1086,38 @@ int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata
1082 } 1086 }
1083 nd->mnt = mntget(current->fs->rootmnt); 1087 nd->mnt = mntget(current->fs->rootmnt);
1084 nd->dentry = dget(current->fs->root); 1088 nd->dentry = dget(current->fs->root);
1085 } else { 1089 } else if (dfd == AT_FDCWD) {
1086 nd->mnt = mntget(current->fs->pwdmnt); 1090 nd->mnt = mntget(current->fs->pwdmnt);
1087 nd->dentry = dget(current->fs->pwd); 1091 nd->dentry = dget(current->fs->pwd);
1092 } else {
1093 struct file *file;
1094 int fput_needed;
1095 struct dentry *dentry;
1096
1097 file = fget_light(dfd, &fput_needed);
1098 if (!file) {
1099 retval = -EBADF;
1100 goto out_fail;
1101 }
1102
1103 dentry = file->f_dentry;
1104
1105 if (!S_ISDIR(dentry->d_inode->i_mode)) {
1106 retval = -ENOTDIR;
1107 fput_light(file, fput_needed);
1108 goto out_fail;
1109 }
1110
1111 retval = file_permission(file, MAY_EXEC);
1112 if (retval) {
1113 fput_light(file, fput_needed);
1114 goto out_fail;
1115 }
1116
1117 nd->mnt = mntget(file->f_vfsmnt);
1118 nd->dentry = dget(dentry);
1119
1120 fput_light(file, fput_needed);
1088 } 1121 }
1089 read_unlock(&current->fs->lock); 1122 read_unlock(&current->fs->lock);
1090 current->total_link_count = 0; 1123 current->total_link_count = 0;
@@ -1093,11 +1126,19 @@ out:
1093 if (unlikely(current->audit_context 1126 if (unlikely(current->audit_context
1094 && nd && nd->dentry && nd->dentry->d_inode)) 1127 && nd && nd->dentry && nd->dentry->d_inode))
1095 audit_inode(name, nd->dentry->d_inode, flags); 1128 audit_inode(name, nd->dentry->d_inode, flags);
1129out_fail:
1096 return retval; 1130 return retval;
1097} 1131}
1098 1132
1099static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags, 1133int fastcall path_lookup(const char *name, unsigned int flags,
1100 struct nameidata *nd, int open_flags, int create_mode) 1134 struct nameidata *nd)
1135{
1136 return do_path_lookup(AT_FDCWD, name, flags, nd);
1137}
1138
1139static int __path_lookup_intent_open(int dfd, const char *name,
1140 unsigned int lookup_flags, struct nameidata *nd,
1141 int open_flags, int create_mode)
1101{ 1142{
1102 struct file *filp = get_empty_filp(); 1143 struct file *filp = get_empty_filp();
1103 int err; 1144 int err;
@@ -1107,7 +1148,7 @@ static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags
1107 nd->intent.open.file = filp; 1148 nd->intent.open.file = filp;
1108 nd->intent.open.flags = open_flags; 1149 nd->intent.open.flags = open_flags;
1109 nd->intent.open.create_mode = create_mode; 1150 nd->intent.open.create_mode = create_mode;
1110 err = path_lookup(name, lookup_flags|LOOKUP_OPEN, nd); 1151 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
1111 if (IS_ERR(nd->intent.open.file)) { 1152 if (IS_ERR(nd->intent.open.file)) {
1112 if (err == 0) { 1153 if (err == 0) {
1113 err = PTR_ERR(nd->intent.open.file); 1154 err = PTR_ERR(nd->intent.open.file);
@@ -1125,10 +1166,10 @@ static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags
1125 * @nd: pointer to nameidata 1166 * @nd: pointer to nameidata
1126 * @open_flags: open intent flags 1167 * @open_flags: open intent flags
1127 */ 1168 */
1128int path_lookup_open(const char *name, unsigned int lookup_flags, 1169int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags,
1129 struct nameidata *nd, int open_flags) 1170 struct nameidata *nd, int open_flags)
1130{ 1171{
1131 return __path_lookup_intent_open(name, lookup_flags, nd, 1172 return __path_lookup_intent_open(dfd, name, lookup_flags, nd,
1132 open_flags, 0); 1173 open_flags, 0);
1133} 1174}
1134 1175
@@ -1140,12 +1181,12 @@ int path_lookup_open(const char *name, unsigned int lookup_flags,
1140 * @open_flags: open intent flags 1181 * @open_flags: open intent flags
1141 * @create_mode: create intent flags 1182 * @create_mode: create intent flags
1142 */ 1183 */
1143static int path_lookup_create(const char *name, unsigned int lookup_flags, 1184static int path_lookup_create(int dfd, const char *name,
1144 struct nameidata *nd, int open_flags, 1185 unsigned int lookup_flags, struct nameidata *nd,
1145 int create_mode) 1186 int open_flags, int create_mode)
1146{ 1187{
1147 return __path_lookup_intent_open(name, lookup_flags|LOOKUP_CREATE, nd, 1188 return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE,
1148 open_flags, create_mode); 1189 nd, open_flags, create_mode);
1149} 1190}
1150 1191
1151int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags, 1192int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags,
@@ -1155,7 +1196,7 @@ int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags,
1155 int err = PTR_ERR(tmp); 1196 int err = PTR_ERR(tmp);
1156 1197
1157 if (!IS_ERR(tmp)) { 1198 if (!IS_ERR(tmp)) {
1158 err = __path_lookup_intent_open(tmp, lookup_flags, nd, open_flags, 0); 1199 err = __path_lookup_intent_open(AT_FDCWD, tmp, lookup_flags, nd, open_flags, 0);
1159 putname(tmp); 1200 putname(tmp);
1160 } 1201 }
1161 return err; 1202 return err;
@@ -1247,18 +1288,24 @@ access:
1247 * that namei follows links, while lnamei does not. 1288 * that namei follows links, while lnamei does not.
1248 * SMP-safe 1289 * SMP-safe
1249 */ 1290 */
1250int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) 1291int fastcall __user_walk_fd(int dfd, const char __user *name, unsigned flags,
1292 struct nameidata *nd)
1251{ 1293{
1252 char *tmp = getname(name); 1294 char *tmp = getname(name);
1253 int err = PTR_ERR(tmp); 1295 int err = PTR_ERR(tmp);
1254 1296
1255 if (!IS_ERR(tmp)) { 1297 if (!IS_ERR(tmp)) {
1256 err = path_lookup(tmp, flags, nd); 1298 err = do_path_lookup(dfd, tmp, flags, nd);
1257 putname(tmp); 1299 putname(tmp);
1258 } 1300 }
1259 return err; 1301 return err;
1260} 1302}
1261 1303
1304int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
1305{
1306 return __user_walk_fd(AT_FDCWD, name, flags, nd);
1307}
1308
1262/* 1309/*
1263 * It's inline, so penalty for filesystems that don't use sticky bit is 1310 * It's inline, so penalty for filesystems that don't use sticky bit is
1264 * minimal. 1311 * minimal.
@@ -1293,7 +1340,7 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
1293 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 1340 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
1294 * nfs_async_unlink(). 1341 * nfs_async_unlink().
1295 */ 1342 */
1296static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir) 1343static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1297{ 1344{
1298 int error; 1345 int error;
1299 1346
@@ -1366,7 +1413,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
1366 struct dentry *p; 1413 struct dentry *p;
1367 1414
1368 if (p1 == p2) { 1415 if (p1 == p2) {
1369 down(&p1->d_inode->i_sem); 1416 mutex_lock(&p1->d_inode->i_mutex);
1370 return NULL; 1417 return NULL;
1371 } 1418 }
1372 1419
@@ -1374,30 +1421,30 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
1374 1421
1375 for (p = p1; p->d_parent != p; p = p->d_parent) { 1422 for (p = p1; p->d_parent != p; p = p->d_parent) {
1376 if (p->d_parent == p2) { 1423 if (p->d_parent == p2) {
1377 down(&p2->d_inode->i_sem); 1424 mutex_lock(&p2->d_inode->i_mutex);
1378 down(&p1->d_inode->i_sem); 1425 mutex_lock(&p1->d_inode->i_mutex);
1379 return p; 1426 return p;
1380 } 1427 }
1381 } 1428 }
1382 1429
1383 for (p = p2; p->d_parent != p; p = p->d_parent) { 1430 for (p = p2; p->d_parent != p; p = p->d_parent) {
1384 if (p->d_parent == p1) { 1431 if (p->d_parent == p1) {
1385 down(&p1->d_inode->i_sem); 1432 mutex_lock(&p1->d_inode->i_mutex);
1386 down(&p2->d_inode->i_sem); 1433 mutex_lock(&p2->d_inode->i_mutex);
1387 return p; 1434 return p;
1388 } 1435 }
1389 } 1436 }
1390 1437
1391 down(&p1->d_inode->i_sem); 1438 mutex_lock(&p1->d_inode->i_mutex);
1392 down(&p2->d_inode->i_sem); 1439 mutex_lock(&p2->d_inode->i_mutex);
1393 return NULL; 1440 return NULL;
1394} 1441}
1395 1442
1396void unlock_rename(struct dentry *p1, struct dentry *p2) 1443void unlock_rename(struct dentry *p1, struct dentry *p2)
1397{ 1444{
1398 up(&p1->d_inode->i_sem); 1445 mutex_unlock(&p1->d_inode->i_mutex);
1399 if (p1 != p2) { 1446 if (p1 != p2) {
1400 up(&p2->d_inode->i_sem); 1447 mutex_unlock(&p2->d_inode->i_mutex);
1401 up(&p1->d_inode->i_sb->s_vfs_rename_sem); 1448 up(&p1->d_inode->i_sb->s_vfs_rename_sem);
1402 } 1449 }
1403} 1450}
@@ -1491,7 +1538,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1491 if (!error) { 1538 if (!error) {
1492 DQUOT_INIT(inode); 1539 DQUOT_INIT(inode);
1493 1540
1494 error = do_truncate(dentry, 0, NULL); 1541 error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL);
1495 } 1542 }
1496 put_write_access(inode); 1543 put_write_access(inode);
1497 if (error) 1544 if (error)
@@ -1517,7 +1564,8 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1517 * for symlinks (where the permissions are checked later). 1564 * for symlinks (where the permissions are checked later).
1518 * SMP-safe 1565 * SMP-safe
1519 */ 1566 */
1520int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) 1567int open_namei(int dfd, const char *pathname, int flag,
1568 int mode, struct nameidata *nd)
1521{ 1569{
1522 int acc_mode, error; 1570 int acc_mode, error;
1523 struct path path; 1571 struct path path;
@@ -1539,7 +1587,8 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
1539 * The simplest case - just a plain lookup. 1587 * The simplest case - just a plain lookup.
1540 */ 1588 */
1541 if (!(flag & O_CREAT)) { 1589 if (!(flag & O_CREAT)) {
1542 error = path_lookup_open(pathname, lookup_flags(flag), nd, flag); 1590 error = path_lookup_open(dfd, pathname, lookup_flags(flag),
1591 nd, flag);
1543 if (error) 1592 if (error)
1544 return error; 1593 return error;
1545 goto ok; 1594 goto ok;
@@ -1548,7 +1597,7 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
1548 /* 1597 /*
1549 * Create - we need to know the parent. 1598 * Create - we need to know the parent.
1550 */ 1599 */
1551 error = path_lookup_create(pathname, LOOKUP_PARENT, nd, flag, mode); 1600 error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
1552 if (error) 1601 if (error)
1553 return error; 1602 return error;
1554 1603
@@ -1563,14 +1612,14 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
1563 1612
1564 dir = nd->dentry; 1613 dir = nd->dentry;
1565 nd->flags &= ~LOOKUP_PARENT; 1614 nd->flags &= ~LOOKUP_PARENT;
1566 down(&dir->d_inode->i_sem); 1615 mutex_lock(&dir->d_inode->i_mutex);
1567 path.dentry = lookup_hash(nd); 1616 path.dentry = lookup_hash(nd);
1568 path.mnt = nd->mnt; 1617 path.mnt = nd->mnt;
1569 1618
1570do_last: 1619do_last:
1571 error = PTR_ERR(path.dentry); 1620 error = PTR_ERR(path.dentry);
1572 if (IS_ERR(path.dentry)) { 1621 if (IS_ERR(path.dentry)) {
1573 up(&dir->d_inode->i_sem); 1622 mutex_unlock(&dir->d_inode->i_mutex);
1574 goto exit; 1623 goto exit;
1575 } 1624 }
1576 1625
@@ -1579,7 +1628,7 @@ do_last:
1579 if (!IS_POSIXACL(dir->d_inode)) 1628 if (!IS_POSIXACL(dir->d_inode))
1580 mode &= ~current->fs->umask; 1629 mode &= ~current->fs->umask;
1581 error = vfs_create(dir->d_inode, path.dentry, mode, nd); 1630 error = vfs_create(dir->d_inode, path.dentry, mode, nd);
1582 up(&dir->d_inode->i_sem); 1631 mutex_unlock(&dir->d_inode->i_mutex);
1583 dput(nd->dentry); 1632 dput(nd->dentry);
1584 nd->dentry = path.dentry; 1633 nd->dentry = path.dentry;
1585 if (error) 1634 if (error)
@@ -1593,7 +1642,7 @@ do_last:
1593 /* 1642 /*
1594 * It already exists. 1643 * It already exists.
1595 */ 1644 */
1596 up(&dir->d_inode->i_sem); 1645 mutex_unlock(&dir->d_inode->i_mutex);
1597 1646
1598 error = -EEXIST; 1647 error = -EEXIST;
1599 if (flag & O_EXCL) 1648 if (flag & O_EXCL)
@@ -1665,7 +1714,7 @@ do_link:
1665 goto exit; 1714 goto exit;
1666 } 1715 }
1667 dir = nd->dentry; 1716 dir = nd->dentry;
1668 down(&dir->d_inode->i_sem); 1717 mutex_lock(&dir->d_inode->i_mutex);
1669 path.dentry = lookup_hash(nd); 1718 path.dentry = lookup_hash(nd);
1670 path.mnt = nd->mnt; 1719 path.mnt = nd->mnt;
1671 __putname(nd->last.name); 1720 __putname(nd->last.name);
@@ -1680,13 +1729,13 @@ do_link:
1680 * Simple function to lookup and return a dentry and create it 1729 * Simple function to lookup and return a dentry and create it
1681 * if it doesn't exist. Is SMP-safe. 1730 * if it doesn't exist. Is SMP-safe.
1682 * 1731 *
1683 * Returns with nd->dentry->d_inode->i_sem locked. 1732 * Returns with nd->dentry->d_inode->i_mutex locked.
1684 */ 1733 */
1685struct dentry *lookup_create(struct nameidata *nd, int is_dir) 1734struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1686{ 1735{
1687 struct dentry *dentry = ERR_PTR(-EEXIST); 1736 struct dentry *dentry = ERR_PTR(-EEXIST);
1688 1737
1689 down(&nd->dentry->d_inode->i_sem); 1738 mutex_lock(&nd->dentry->d_inode->i_mutex);
1690 /* 1739 /*
1691 * Yucky last component or no last component at all? 1740 * Yucky last component or no last component at all?
1692 * (foo/., foo/.., /////) 1741 * (foo/., foo/.., /////)
@@ -1743,7 +1792,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1743 return error; 1792 return error;
1744} 1793}
1745 1794
1746asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev) 1795asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
1796 unsigned dev)
1747{ 1797{
1748 int error = 0; 1798 int error = 0;
1749 char * tmp; 1799 char * tmp;
@@ -1756,7 +1806,7 @@ asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev)
1756 if (IS_ERR(tmp)) 1806 if (IS_ERR(tmp))
1757 return PTR_ERR(tmp); 1807 return PTR_ERR(tmp);
1758 1808
1759 error = path_lookup(tmp, LOOKUP_PARENT, &nd); 1809 error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
1760 if (error) 1810 if (error)
1761 goto out; 1811 goto out;
1762 dentry = lookup_create(&nd, 0); 1812 dentry = lookup_create(&nd, 0);
@@ -1784,7 +1834,7 @@ asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev)
1784 } 1834 }
1785 dput(dentry); 1835 dput(dentry);
1786 } 1836 }
1787 up(&nd.dentry->d_inode->i_sem); 1837 mutex_unlock(&nd.dentry->d_inode->i_mutex);
1788 path_release(&nd); 1838 path_release(&nd);
1789out: 1839out:
1790 putname(tmp); 1840 putname(tmp);
@@ -1792,6 +1842,11 @@ out:
1792 return error; 1842 return error;
1793} 1843}
1794 1844
1845asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev)
1846{
1847 return sys_mknodat(AT_FDCWD, filename, mode, dev);
1848}
1849
1795int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1850int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1796{ 1851{
1797 int error = may_create(dir, dentry, NULL); 1852 int error = may_create(dir, dentry, NULL);
@@ -1814,7 +1869,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1814 return error; 1869 return error;
1815} 1870}
1816 1871
1817asmlinkage long sys_mkdir(const char __user * pathname, int mode) 1872asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
1818{ 1873{
1819 int error = 0; 1874 int error = 0;
1820 char * tmp; 1875 char * tmp;
@@ -1825,7 +1880,7 @@ asmlinkage long sys_mkdir(const char __user * pathname, int mode)
1825 struct dentry *dentry; 1880 struct dentry *dentry;
1826 struct nameidata nd; 1881 struct nameidata nd;
1827 1882
1828 error = path_lookup(tmp, LOOKUP_PARENT, &nd); 1883 error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
1829 if (error) 1884 if (error)
1830 goto out; 1885 goto out;
1831 dentry = lookup_create(&nd, 1); 1886 dentry = lookup_create(&nd, 1);
@@ -1836,7 +1891,7 @@ asmlinkage long sys_mkdir(const char __user * pathname, int mode)
1836 error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); 1891 error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
1837 dput(dentry); 1892 dput(dentry);
1838 } 1893 }
1839 up(&nd.dentry->d_inode->i_sem); 1894 mutex_unlock(&nd.dentry->d_inode->i_mutex);
1840 path_release(&nd); 1895 path_release(&nd);
1841out: 1896out:
1842 putname(tmp); 1897 putname(tmp);
@@ -1845,6 +1900,11 @@ out:
1845 return error; 1900 return error;
1846} 1901}
1847 1902
1903asmlinkage long sys_mkdir(const char __user *pathname, int mode)
1904{
1905 return sys_mkdirat(AT_FDCWD, pathname, mode);
1906}
1907
1848/* 1908/*
1849 * We try to drop the dentry early: we should have 1909 * We try to drop the dentry early: we should have
1850 * a usage count of 2 if we're the only user of this 1910 * a usage count of 2 if we're the only user of this
@@ -1885,7 +1945,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1885 1945
1886 DQUOT_INIT(dir); 1946 DQUOT_INIT(dir);
1887 1947
1888 down(&dentry->d_inode->i_sem); 1948 mutex_lock(&dentry->d_inode->i_mutex);
1889 dentry_unhash(dentry); 1949 dentry_unhash(dentry);
1890 if (d_mountpoint(dentry)) 1950 if (d_mountpoint(dentry))
1891 error = -EBUSY; 1951 error = -EBUSY;
@@ -1897,7 +1957,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1897 dentry->d_inode->i_flags |= S_DEAD; 1957 dentry->d_inode->i_flags |= S_DEAD;
1898 } 1958 }
1899 } 1959 }
1900 up(&dentry->d_inode->i_sem); 1960 mutex_unlock(&dentry->d_inode->i_mutex);
1901 if (!error) { 1961 if (!error) {
1902 d_delete(dentry); 1962 d_delete(dentry);
1903 } 1963 }
@@ -1906,7 +1966,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1906 return error; 1966 return error;
1907} 1967}
1908 1968
1909asmlinkage long sys_rmdir(const char __user * pathname) 1969static long do_rmdir(int dfd, const char __user *pathname)
1910{ 1970{
1911 int error = 0; 1971 int error = 0;
1912 char * name; 1972 char * name;
@@ -1917,7 +1977,7 @@ asmlinkage long sys_rmdir(const char __user * pathname)
1917 if(IS_ERR(name)) 1977 if(IS_ERR(name))
1918 return PTR_ERR(name); 1978 return PTR_ERR(name);
1919 1979
1920 error = path_lookup(name, LOOKUP_PARENT, &nd); 1980 error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
1921 if (error) 1981 if (error)
1922 goto exit; 1982 goto exit;
1923 1983
@@ -1932,14 +1992,14 @@ asmlinkage long sys_rmdir(const char __user * pathname)
1932 error = -EBUSY; 1992 error = -EBUSY;
1933 goto exit1; 1993 goto exit1;
1934 } 1994 }
1935 down(&nd.dentry->d_inode->i_sem); 1995 mutex_lock(&nd.dentry->d_inode->i_mutex);
1936 dentry = lookup_hash(&nd); 1996 dentry = lookup_hash(&nd);
1937 error = PTR_ERR(dentry); 1997 error = PTR_ERR(dentry);
1938 if (!IS_ERR(dentry)) { 1998 if (!IS_ERR(dentry)) {
1939 error = vfs_rmdir(nd.dentry->d_inode, dentry); 1999 error = vfs_rmdir(nd.dentry->d_inode, dentry);
1940 dput(dentry); 2000 dput(dentry);
1941 } 2001 }
1942 up(&nd.dentry->d_inode->i_sem); 2002 mutex_unlock(&nd.dentry->d_inode->i_mutex);
1943exit1: 2003exit1:
1944 path_release(&nd); 2004 path_release(&nd);
1945exit: 2005exit:
@@ -1947,6 +2007,11 @@ exit:
1947 return error; 2007 return error;
1948} 2008}
1949 2009
2010asmlinkage long sys_rmdir(const char __user *pathname)
2011{
2012 return do_rmdir(AT_FDCWD, pathname);
2013}
2014
1950int vfs_unlink(struct inode *dir, struct dentry *dentry) 2015int vfs_unlink(struct inode *dir, struct dentry *dentry)
1951{ 2016{
1952 int error = may_delete(dir, dentry, 0); 2017 int error = may_delete(dir, dentry, 0);
@@ -1959,7 +2024,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
1959 2024
1960 DQUOT_INIT(dir); 2025 DQUOT_INIT(dir);
1961 2026
1962 down(&dentry->d_inode->i_sem); 2027 mutex_lock(&dentry->d_inode->i_mutex);
1963 if (d_mountpoint(dentry)) 2028 if (d_mountpoint(dentry))
1964 error = -EBUSY; 2029 error = -EBUSY;
1965 else { 2030 else {
@@ -1967,7 +2032,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
1967 if (!error) 2032 if (!error)
1968 error = dir->i_op->unlink(dir, dentry); 2033 error = dir->i_op->unlink(dir, dentry);
1969 } 2034 }
1970 up(&dentry->d_inode->i_sem); 2035 mutex_unlock(&dentry->d_inode->i_mutex);
1971 2036
1972 /* We don't d_delete() NFS sillyrenamed files--they still exist. */ 2037 /* We don't d_delete() NFS sillyrenamed files--they still exist. */
1973 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { 2038 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
@@ -1979,11 +2044,11 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
1979 2044
1980/* 2045/*
1981 * Make sure that the actual truncation of the file will occur outside its 2046 * Make sure that the actual truncation of the file will occur outside its
1982 * directory's i_sem. Truncate can take a long time if there is a lot of 2047 * directory's i_mutex. Truncate can take a long time if there is a lot of
1983 * writeout happening, and we don't want to prevent access to the directory 2048 * writeout happening, and we don't want to prevent access to the directory
1984 * while waiting on the I/O. 2049 * while waiting on the I/O.
1985 */ 2050 */
1986asmlinkage long sys_unlink(const char __user * pathname) 2051static long do_unlinkat(int dfd, const char __user *pathname)
1987{ 2052{
1988 int error = 0; 2053 int error = 0;
1989 char * name; 2054 char * name;
@@ -1995,13 +2060,13 @@ asmlinkage long sys_unlink(const char __user * pathname)
1995 if(IS_ERR(name)) 2060 if(IS_ERR(name))
1996 return PTR_ERR(name); 2061 return PTR_ERR(name);
1997 2062
1998 error = path_lookup(name, LOOKUP_PARENT, &nd); 2063 error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
1999 if (error) 2064 if (error)
2000 goto exit; 2065 goto exit;
2001 error = -EISDIR; 2066 error = -EISDIR;
2002 if (nd.last_type != LAST_NORM) 2067 if (nd.last_type != LAST_NORM)
2003 goto exit1; 2068 goto exit1;
2004 down(&nd.dentry->d_inode->i_sem); 2069 mutex_lock(&nd.dentry->d_inode->i_mutex);
2005 dentry = lookup_hash(&nd); 2070 dentry = lookup_hash(&nd);
2006 error = PTR_ERR(dentry); 2071 error = PTR_ERR(dentry);
2007 if (!IS_ERR(dentry)) { 2072 if (!IS_ERR(dentry)) {
@@ -2015,7 +2080,7 @@ asmlinkage long sys_unlink(const char __user * pathname)
2015 exit2: 2080 exit2:
2016 dput(dentry); 2081 dput(dentry);
2017 } 2082 }
2018 up(&nd.dentry->d_inode->i_sem); 2083 mutex_unlock(&nd.dentry->d_inode->i_mutex);
2019 if (inode) 2084 if (inode)
2020 iput(inode); /* truncate the inode here */ 2085 iput(inode); /* truncate the inode here */
2021exit1: 2086exit1:
@@ -2030,6 +2095,22 @@ slashes:
2030 goto exit2; 2095 goto exit2;
2031} 2096}
2032 2097
2098asmlinkage long sys_unlinkat(int dfd, const char __user *pathname, int flag)
2099{
2100 if ((flag & ~AT_REMOVEDIR) != 0)
2101 return -EINVAL;
2102
2103 if (flag & AT_REMOVEDIR)
2104 return do_rmdir(dfd, pathname);
2105
2106 return do_unlinkat(dfd, pathname);
2107}
2108
2109asmlinkage long sys_unlink(const char __user *pathname)
2110{
2111 return do_unlinkat(AT_FDCWD, pathname);
2112}
2113
2033int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) 2114int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
2034{ 2115{
2035 int error = may_create(dir, dentry, NULL); 2116 int error = may_create(dir, dentry, NULL);
@@ -2051,7 +2132,8 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
2051 return error; 2132 return error;
2052} 2133}
2053 2134
2054asmlinkage long sys_symlink(const char __user * oldname, const char __user * newname) 2135asmlinkage long sys_symlinkat(const char __user *oldname,
2136 int newdfd, const char __user *newname)
2055{ 2137{
2056 int error = 0; 2138 int error = 0;
2057 char * from; 2139 char * from;
@@ -2066,7 +2148,7 @@ asmlinkage long sys_symlink(const char __user * oldname, const char __user * new
2066 struct dentry *dentry; 2148 struct dentry *dentry;
2067 struct nameidata nd; 2149 struct nameidata nd;
2068 2150
2069 error = path_lookup(to, LOOKUP_PARENT, &nd); 2151 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
2070 if (error) 2152 if (error)
2071 goto out; 2153 goto out;
2072 dentry = lookup_create(&nd, 0); 2154 dentry = lookup_create(&nd, 0);
@@ -2075,7 +2157,7 @@ asmlinkage long sys_symlink(const char __user * oldname, const char __user * new
2075 error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO); 2157 error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO);
2076 dput(dentry); 2158 dput(dentry);
2077 } 2159 }
2078 up(&nd.dentry->d_inode->i_sem); 2160 mutex_unlock(&nd.dentry->d_inode->i_mutex);
2079 path_release(&nd); 2161 path_release(&nd);
2080out: 2162out:
2081 putname(to); 2163 putname(to);
@@ -2084,6 +2166,11 @@ out:
2084 return error; 2166 return error;
2085} 2167}
2086 2168
2169asmlinkage long sys_symlink(const char __user *oldname, const char __user *newname)
2170{
2171 return sys_symlinkat(oldname, AT_FDCWD, newname);
2172}
2173
2087int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) 2174int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
2088{ 2175{
2089 struct inode *inode = old_dentry->d_inode; 2176 struct inode *inode = old_dentry->d_inode;
@@ -2113,10 +2200,10 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2113 if (error) 2200 if (error)
2114 return error; 2201 return error;
2115 2202
2116 down(&old_dentry->d_inode->i_sem); 2203 mutex_lock(&old_dentry->d_inode->i_mutex);
2117 DQUOT_INIT(dir); 2204 DQUOT_INIT(dir);
2118 error = dir->i_op->link(old_dentry, dir, new_dentry); 2205 error = dir->i_op->link(old_dentry, dir, new_dentry);
2119 up(&old_dentry->d_inode->i_sem); 2206 mutex_unlock(&old_dentry->d_inode->i_mutex);
2120 if (!error) 2207 if (!error)
2121 fsnotify_create(dir, new_dentry->d_name.name); 2208 fsnotify_create(dir, new_dentry->d_name.name);
2122 return error; 2209 return error;
@@ -2131,7 +2218,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2131 * with linux 2.0, and to avoid hard-linking to directories 2218 * with linux 2.0, and to avoid hard-linking to directories
2132 * and other special files. --ADM 2219 * and other special files. --ADM
2133 */ 2220 */
2134asmlinkage long sys_link(const char __user * oldname, const char __user * newname) 2221asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
2222 int newdfd, const char __user *newname)
2135{ 2223{
2136 struct dentry *new_dentry; 2224 struct dentry *new_dentry;
2137 struct nameidata nd, old_nd; 2225 struct nameidata nd, old_nd;
@@ -2142,10 +2230,10 @@ asmlinkage long sys_link(const char __user * oldname, const char __user * newnam
2142 if (IS_ERR(to)) 2230 if (IS_ERR(to))
2143 return PTR_ERR(to); 2231 return PTR_ERR(to);
2144 2232
2145 error = __user_walk(oldname, 0, &old_nd); 2233 error = __user_walk_fd(olddfd, oldname, 0, &old_nd);
2146 if (error) 2234 if (error)
2147 goto exit; 2235 goto exit;
2148 error = path_lookup(to, LOOKUP_PARENT, &nd); 2236 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
2149 if (error) 2237 if (error)
2150 goto out; 2238 goto out;
2151 error = -EXDEV; 2239 error = -EXDEV;
@@ -2157,7 +2245,7 @@ asmlinkage long sys_link(const char __user * oldname, const char __user * newnam
2157 error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); 2245 error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
2158 dput(new_dentry); 2246 dput(new_dentry);
2159 } 2247 }
2160 up(&nd.dentry->d_inode->i_sem); 2248 mutex_unlock(&nd.dentry->d_inode->i_mutex);
2161out_release: 2249out_release:
2162 path_release(&nd); 2250 path_release(&nd);
2163out: 2251out:
@@ -2168,6 +2256,11 @@ exit:
2168 return error; 2256 return error;
2169} 2257}
2170 2258
2259asmlinkage long sys_link(const char __user *oldname, const char __user *newname)
2260{
2261 return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname);
2262}
2263
2171/* 2264/*
2172 * The worst of all namespace operations - renaming directory. "Perverted" 2265 * The worst of all namespace operations - renaming directory. "Perverted"
2173 * doesn't even start to describe it. Somebody in UCB had a heck of a trip... 2266 * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
@@ -2178,7 +2271,7 @@ exit:
2178 * sb->s_vfs_rename_sem. We might be more accurate, but that's another 2271 * sb->s_vfs_rename_sem. We might be more accurate, but that's another
2179 * story. 2272 * story.
2180 * c) we have to lock _three_ objects - parents and victim (if it exists). 2273 * c) we have to lock _three_ objects - parents and victim (if it exists).
2181 * And that - after we got ->i_sem on parents (until then we don't know 2274 * And that - after we got ->i_mutex on parents (until then we don't know
2182 * whether the target exists). Solution: try to be smart with locking 2275 * whether the target exists). Solution: try to be smart with locking
2183 * order for inodes. We rely on the fact that tree topology may change 2276 * order for inodes. We rely on the fact that tree topology may change
2184 * only under ->s_vfs_rename_sem _and_ that parent of the object we 2277 * only under ->s_vfs_rename_sem _and_ that parent of the object we
@@ -2195,9 +2288,9 @@ exit:
2195 * stuff into VFS), but the former is not going away. Solution: the same 2288 * stuff into VFS), but the former is not going away. Solution: the same
2196 * trick as in rmdir(). 2289 * trick as in rmdir().
2197 * e) conversion from fhandle to dentry may come in the wrong moment - when 2290 * e) conversion from fhandle to dentry may come in the wrong moment - when
2198 * we are removing the target. Solution: we will have to grab ->i_sem 2291 * we are removing the target. Solution: we will have to grab ->i_mutex
2199 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on 2292 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
2200 * ->i_sem on parents, which works but leads to some truely excessive 2293 * ->i_mutex on parents, which works but leads to some truely excessive
2201 * locking]. 2294 * locking].
2202 */ 2295 */
2203static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, 2296static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
@@ -2222,7 +2315,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2222 2315
2223 target = new_dentry->d_inode; 2316 target = new_dentry->d_inode;
2224 if (target) { 2317 if (target) {
2225 down(&target->i_sem); 2318 mutex_lock(&target->i_mutex);
2226 dentry_unhash(new_dentry); 2319 dentry_unhash(new_dentry);
2227 } 2320 }
2228 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2321 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
@@ -2232,7 +2325,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2232 if (target) { 2325 if (target) {
2233 if (!error) 2326 if (!error)
2234 target->i_flags |= S_DEAD; 2327 target->i_flags |= S_DEAD;
2235 up(&target->i_sem); 2328 mutex_unlock(&target->i_mutex);
2236 if (d_unhashed(new_dentry)) 2329 if (d_unhashed(new_dentry))
2237 d_rehash(new_dentry); 2330 d_rehash(new_dentry);
2238 dput(new_dentry); 2331 dput(new_dentry);
@@ -2255,7 +2348,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2255 dget(new_dentry); 2348 dget(new_dentry);
2256 target = new_dentry->d_inode; 2349 target = new_dentry->d_inode;
2257 if (target) 2350 if (target)
2258 down(&target->i_sem); 2351 mutex_lock(&target->i_mutex);
2259 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2352 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
2260 error = -EBUSY; 2353 error = -EBUSY;
2261 else 2354 else
@@ -2266,7 +2359,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2266 d_move(old_dentry, new_dentry); 2359 d_move(old_dentry, new_dentry);
2267 } 2360 }
2268 if (target) 2361 if (target)
2269 up(&target->i_sem); 2362 mutex_unlock(&target->i_mutex);
2270 dput(new_dentry); 2363 dput(new_dentry);
2271 return error; 2364 return error;
2272} 2365}
@@ -2314,7 +2407,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2314 return error; 2407 return error;
2315} 2408}
2316 2409
2317static inline int do_rename(const char * oldname, const char * newname) 2410static int do_rename(int olddfd, const char *oldname,
2411 int newdfd, const char *newname)
2318{ 2412{
2319 int error = 0; 2413 int error = 0;
2320 struct dentry * old_dir, * new_dir; 2414 struct dentry * old_dir, * new_dir;
@@ -2322,11 +2416,11 @@ static inline int do_rename(const char * oldname, const char * newname)
2322 struct dentry * trap; 2416 struct dentry * trap;
2323 struct nameidata oldnd, newnd; 2417 struct nameidata oldnd, newnd;
2324 2418
2325 error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); 2419 error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd);
2326 if (error) 2420 if (error)
2327 goto exit; 2421 goto exit;
2328 2422
2329 error = path_lookup(newname, LOOKUP_PARENT, &newnd); 2423 error = do_path_lookup(newdfd, newname, LOOKUP_PARENT, &newnd);
2330 if (error) 2424 if (error)
2331 goto exit1; 2425 goto exit1;
2332 2426
@@ -2390,7 +2484,8 @@ exit:
2390 return error; 2484 return error;
2391} 2485}
2392 2486
2393asmlinkage long sys_rename(const char __user * oldname, const char __user * newname) 2487asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
2488 int newdfd, const char __user *newname)
2394{ 2489{
2395 int error; 2490 int error;
2396 char * from; 2491 char * from;
@@ -2402,13 +2497,18 @@ asmlinkage long sys_rename(const char __user * oldname, const char __user * newn
2402 to = getname(newname); 2497 to = getname(newname);
2403 error = PTR_ERR(to); 2498 error = PTR_ERR(to);
2404 if (!IS_ERR(to)) { 2499 if (!IS_ERR(to)) {
2405 error = do_rename(from,to); 2500 error = do_rename(olddfd, from, newdfd, to);
2406 putname(to); 2501 putname(to);
2407 } 2502 }
2408 putname(from); 2503 putname(from);
2409 return error; 2504 return error;
2410} 2505}
2411 2506
2507asmlinkage long sys_rename(const char __user *oldname, const char __user *newname)
2508{
2509 return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname);
2510}
2511
2412int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) 2512int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
2413{ 2513{
2414 int len; 2514 int len;
@@ -2552,6 +2652,7 @@ struct inode_operations page_symlink_inode_operations = {
2552}; 2652};
2553 2653
2554EXPORT_SYMBOL(__user_walk); 2654EXPORT_SYMBOL(__user_walk);
2655EXPORT_SYMBOL(__user_walk_fd);
2555EXPORT_SYMBOL(follow_down); 2656EXPORT_SYMBOL(follow_down);
2556EXPORT_SYMBOL(follow_up); 2657EXPORT_SYMBOL(follow_up);
2557EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ 2658EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
diff --git a/fs/namespace.c b/fs/namespace.c
index 2019899f2ab8..ce97becff461 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -16,6 +16,7 @@
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/quotaops.h> 17#include <linux/quotaops.h>
18#include <linux/acct.h> 18#include <linux/acct.h>
19#include <linux/capability.h>
19#include <linux/module.h> 20#include <linux/module.h>
20#include <linux/seq_file.h> 21#include <linux/seq_file.h>
21#include <linux/namespace.h> 22#include <linux/namespace.h>
@@ -47,6 +48,10 @@ static int hash_mask __read_mostly, hash_bits __read_mostly;
47static kmem_cache_t *mnt_cache; 48static kmem_cache_t *mnt_cache;
48static struct rw_semaphore namespace_sem; 49static struct rw_semaphore namespace_sem;
49 50
51/* /sys/fs */
52decl_subsys(fs, NULL, NULL);
53EXPORT_SYMBOL_GPL(fs_subsys);
54
50static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) 55static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
51{ 56{
52 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); 57 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
@@ -355,14 +360,14 @@ static int show_vfsmnt(struct seq_file *m, void *v)
355 { MS_SYNCHRONOUS, ",sync" }, 360 { MS_SYNCHRONOUS, ",sync" },
356 { MS_DIRSYNC, ",dirsync" }, 361 { MS_DIRSYNC, ",dirsync" },
357 { MS_MANDLOCK, ",mand" }, 362 { MS_MANDLOCK, ",mand" },
358 { MS_NOATIME, ",noatime" },
359 { MS_NODIRATIME, ",nodiratime" },
360 { 0, NULL } 363 { 0, NULL }
361 }; 364 };
362 static struct proc_fs_info mnt_info[] = { 365 static struct proc_fs_info mnt_info[] = {
363 { MNT_NOSUID, ",nosuid" }, 366 { MNT_NOSUID, ",nosuid" },
364 { MNT_NODEV, ",nodev" }, 367 { MNT_NODEV, ",nodev" },
365 { MNT_NOEXEC, ",noexec" }, 368 { MNT_NOEXEC, ",noexec" },
369 { MNT_NOATIME, ",noatime" },
370 { MNT_NODIRATIME, ",nodiratime" },
366 { 0, NULL } 371 { 0, NULL }
367 }; 372 };
368 struct proc_fs_info *fs_infop; 373 struct proc_fs_info *fs_infop;
@@ -451,7 +456,7 @@ EXPORT_SYMBOL(may_umount);
451void release_mounts(struct list_head *head) 456void release_mounts(struct list_head *head)
452{ 457{
453 struct vfsmount *mnt; 458 struct vfsmount *mnt;
454 while(!list_empty(head)) { 459 while (!list_empty(head)) {
455 mnt = list_entry(head->next, struct vfsmount, mnt_hash); 460 mnt = list_entry(head->next, struct vfsmount, mnt_hash);
456 list_del_init(&mnt->mnt_hash); 461 list_del_init(&mnt->mnt_hash);
457 if (mnt->mnt_parent != mnt) { 462 if (mnt->mnt_parent != mnt) {
@@ -814,7 +819,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
814 return -ENOTDIR; 819 return -ENOTDIR;
815 820
816 err = -ENOENT; 821 err = -ENOENT;
817 down(&nd->dentry->d_inode->i_sem); 822 mutex_lock(&nd->dentry->d_inode->i_mutex);
818 if (IS_DEADDIR(nd->dentry->d_inode)) 823 if (IS_DEADDIR(nd->dentry->d_inode))
819 goto out_unlock; 824 goto out_unlock;
820 825
@@ -826,7 +831,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
826 if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) 831 if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry))
827 err = attach_recursive_mnt(mnt, nd, NULL); 832 err = attach_recursive_mnt(mnt, nd, NULL);
828out_unlock: 833out_unlock:
829 up(&nd->dentry->d_inode->i_sem); 834 mutex_unlock(&nd->dentry->d_inode->i_mutex);
830 if (!err) 835 if (!err)
831 security_sb_post_addmount(mnt, nd); 836 security_sb_post_addmount(mnt, nd);
832 return err; 837 return err;
@@ -962,7 +967,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
962 goto out; 967 goto out;
963 968
964 err = -ENOENT; 969 err = -ENOENT;
965 down(&nd->dentry->d_inode->i_sem); 970 mutex_lock(&nd->dentry->d_inode->i_mutex);
966 if (IS_DEADDIR(nd->dentry->d_inode)) 971 if (IS_DEADDIR(nd->dentry->d_inode))
967 goto out1; 972 goto out1;
968 973
@@ -1004,7 +1009,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
1004 list_del_init(&old_nd.mnt->mnt_expire); 1009 list_del_init(&old_nd.mnt->mnt_expire);
1005 spin_unlock(&vfsmount_lock); 1010 spin_unlock(&vfsmount_lock);
1006out1: 1011out1:
1007 up(&nd->dentry->d_inode->i_sem); 1012 mutex_unlock(&nd->dentry->d_inode->i_mutex);
1008out: 1013out:
1009 up_write(&namespace_sem); 1014 up_write(&namespace_sem);
1010 if (!err) 1015 if (!err)
@@ -1286,7 +1291,13 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1286 mnt_flags |= MNT_NODEV; 1291 mnt_flags |= MNT_NODEV;
1287 if (flags & MS_NOEXEC) 1292 if (flags & MS_NOEXEC)
1288 mnt_flags |= MNT_NOEXEC; 1293 mnt_flags |= MNT_NOEXEC;
1289 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE); 1294 if (flags & MS_NOATIME)
1295 mnt_flags |= MNT_NOATIME;
1296 if (flags & MS_NODIRATIME)
1297 mnt_flags |= MNT_NODIRATIME;
1298
1299 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
1300 MS_NOATIME | MS_NODIRATIME);
1290 1301
1291 /* ... and get the mountpoint */ 1302 /* ... and get the mountpoint */
1292 retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); 1303 retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
@@ -1526,6 +1537,10 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
1526 * pointed to by put_old must yield the same directory as new_root. No other 1537 * pointed to by put_old must yield the same directory as new_root. No other
1527 * file system may be mounted on put_old. After all, new_root is a mountpoint. 1538 * file system may be mounted on put_old. After all, new_root is a mountpoint.
1528 * 1539 *
1540 * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
1541 * See Documentation/filesystems/ramfs-rootfs-initramfs.txt for alternatives
1542 * in this situation.
1543 *
1529 * Notes: 1544 * Notes:
1530 * - we don't move root/cwd if they are not at the root (reason: if something 1545 * - we don't move root/cwd if they are not at the root (reason: if something
1531 * cared enough to change them, it's probably wrong to force them elsewhere) 1546 * cared enough to change them, it's probably wrong to force them elsewhere)
@@ -1569,7 +1584,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
1569 user_nd.dentry = dget(current->fs->root); 1584 user_nd.dentry = dget(current->fs->root);
1570 read_unlock(&current->fs->lock); 1585 read_unlock(&current->fs->lock);
1571 down_write(&namespace_sem); 1586 down_write(&namespace_sem);
1572 down(&old_nd.dentry->d_inode->i_sem); 1587 mutex_lock(&old_nd.dentry->d_inode->i_mutex);
1573 error = -EINVAL; 1588 error = -EINVAL;
1574 if (IS_MNT_SHARED(old_nd.mnt) || 1589 if (IS_MNT_SHARED(old_nd.mnt) ||
1575 IS_MNT_SHARED(new_nd.mnt->mnt_parent) || 1590 IS_MNT_SHARED(new_nd.mnt->mnt_parent) ||
@@ -1622,7 +1637,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
1622 path_release(&root_parent); 1637 path_release(&root_parent);
1623 path_release(&parent_nd); 1638 path_release(&parent_nd);
1624out2: 1639out2:
1625 up(&old_nd.dentry->d_inode->i_sem); 1640 mutex_unlock(&old_nd.dentry->d_inode->i_mutex);
1626 up_write(&namespace_sem); 1641 up_write(&namespace_sem);
1627 path_release(&user_nd); 1642 path_release(&user_nd);
1628 path_release(&old_nd); 1643 path_release(&old_nd);
@@ -1714,6 +1729,7 @@ void __init mnt_init(unsigned long mempages)
1714 i--; 1729 i--;
1715 } while (i); 1730 } while (i);
1716 sysfs_init(); 1731 sysfs_init();
1732 subsystem_register(&fs_subsys);
1717 init_rootfs(); 1733 init_rootfs();
1718 init_mount_tree(); 1734 init_mount_tree();
1719} 1735}
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index a9f7a8ab1d59..cfd76f431dc0 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -365,7 +365,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
365 spin_lock(&dcache_lock); 365 spin_lock(&dcache_lock);
366 next = parent->d_subdirs.next; 366 next = parent->d_subdirs.next;
367 while (next != &parent->d_subdirs) { 367 while (next != &parent->d_subdirs) {
368 dent = list_entry(next, struct dentry, d_child); 368 dent = list_entry(next, struct dentry, d_u.d_child);
369 if ((unsigned long)dent->d_fsdata == fpos) { 369 if ((unsigned long)dent->d_fsdata == fpos) {
370 if (dent->d_inode) 370 if (dent->d_inode)
371 dget_locked(dent); 371 dget_locked(dent);
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 4947d9b11fc1..973b444d6914 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -262,7 +262,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
262 } 262 }
263 vfree(bouncebuffer); 263 vfree(bouncebuffer);
264 264
265 inode_update_time(inode, 1); 265 file_update_time(file);
266 266
267 *ppos = pos; 267 *ppos = pos;
268 268
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 8c8839203cd5..d277a58bd128 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -716,10 +716,8 @@ static void ncp_put_super(struct super_block *sb)
716 fput(server->ncp_filp); 716 fput(server->ncp_filp);
717 kill_proc(server->m.wdog_pid, SIGTERM, 1); 717 kill_proc(server->m.wdog_pid, SIGTERM, 1);
718 718
719 if (server->priv.data) 719 kfree(server->priv.data);
720 ncp_kfree_s(server->priv.data, server->priv.len); 720 kfree(server->auth.object_name);
721 if (server->auth.object_name)
722 ncp_kfree_s(server->auth.object_name, server->auth.object_name_len);
723 vfree(server->packet); 721 vfree(server->packet);
724 sb->s_fs_info = NULL; 722 sb->s_fs_info = NULL;
725 kfree(server); 723 kfree(server);
@@ -958,11 +956,6 @@ out:
958 return result; 956 return result;
959} 957}
960 958
961#ifdef DEBUG_NCP_MALLOC
962int ncp_malloced;
963int ncp_current_malloced;
964#endif
965
966static struct super_block *ncp_get_sb(struct file_system_type *fs_type, 959static struct super_block *ncp_get_sb(struct file_system_type *fs_type,
967 int flags, const char *dev_name, void *data) 960 int flags, const char *dev_name, void *data)
968{ 961{
@@ -981,10 +974,6 @@ static int __init init_ncp_fs(void)
981 int err; 974 int err;
982 DPRINTK("ncpfs: init_module called\n"); 975 DPRINTK("ncpfs: init_module called\n");
983 976
984#ifdef DEBUG_NCP_MALLOC
985 ncp_malloced = 0;
986 ncp_current_malloced = 0;
987#endif
988 err = init_inodecache(); 977 err = init_inodecache();
989 if (err) 978 if (err)
990 goto out1; 979 goto out1;
@@ -1003,10 +992,6 @@ static void __exit exit_ncp_fs(void)
1003 DPRINTK("ncpfs: cleanup_module called\n"); 992 DPRINTK("ncpfs: cleanup_module called\n");
1004 unregister_filesystem(&ncp_fs_type); 993 unregister_filesystem(&ncp_fs_type);
1005 destroy_inodecache(); 994 destroy_inodecache();
1006#ifdef DEBUG_NCP_MALLOC
1007 PRINTK("ncp_malloced: %d\n", ncp_malloced);
1008 PRINTK("ncp_current_malloced: %d\n", ncp_current_malloced);
1009#endif
1010} 995}
1011 996
1012module_init(init_ncp_fs) 997module_init(init_ncp_fs)
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index fd3efdca5ae3..eb3813ad136f 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -10,6 +10,7 @@
10#include <linux/config.h> 10#include <linux/config.h>
11 11
12#include <asm/uaccess.h> 12#include <asm/uaccess.h>
13#include <linux/capability.h>
13#include <linux/errno.h> 14#include <linux/errno.h>
14#include <linux/fs.h> 15#include <linux/fs.h>
15#include <linux/ioctl.h> 16#include <linux/ioctl.h>
@@ -517,10 +518,11 @@ outrel:
517 if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN) 518 if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN)
518 return -ENOMEM; 519 return -ENOMEM;
519 if (user.object_name_len) { 520 if (user.object_name_len) {
520 newname = ncp_kmalloc(user.object_name_len, GFP_USER); 521 newname = kmalloc(user.object_name_len, GFP_USER);
521 if (!newname) return -ENOMEM; 522 if (!newname)
523 return -ENOMEM;
522 if (copy_from_user(newname, user.object_name, user.object_name_len)) { 524 if (copy_from_user(newname, user.object_name, user.object_name_len)) {
523 ncp_kfree_s(newname, user.object_name_len); 525 kfree(newname);
524 return -EFAULT; 526 return -EFAULT;
525 } 527 }
526 } else { 528 } else {
@@ -539,8 +541,8 @@ outrel:
539 server->priv.len = 0; 541 server->priv.len = 0;
540 server->priv.data = NULL; 542 server->priv.data = NULL;
541 /* leave critical section */ 543 /* leave critical section */
542 if (oldprivate) ncp_kfree_s(oldprivate, oldprivatelen); 544 kfree(oldprivate);
543 if (oldname) ncp_kfree_s(oldname, oldnamelen); 545 kfree(oldname);
544 return 0; 546 return 0;
545 } 547 }
546 case NCP_IOC_GETPRIVATEDATA: 548 case NCP_IOC_GETPRIVATEDATA:
@@ -580,10 +582,11 @@ outrel:
580 if (user.len > NCP_PRIVATE_DATA_MAX_LEN) 582 if (user.len > NCP_PRIVATE_DATA_MAX_LEN)
581 return -ENOMEM; 583 return -ENOMEM;
582 if (user.len) { 584 if (user.len) {
583 new = ncp_kmalloc(user.len, GFP_USER); 585 new = kmalloc(user.len, GFP_USER);
584 if (!new) return -ENOMEM; 586 if (!new)
587 return -ENOMEM;
585 if (copy_from_user(new, user.data, user.len)) { 588 if (copy_from_user(new, user.data, user.len)) {
586 ncp_kfree_s(new, user.len); 589 kfree(new);
587 return -EFAULT; 590 return -EFAULT;
588 } 591 }
589 } else { 592 } else {
@@ -595,7 +598,7 @@ outrel:
595 server->priv.len = user.len; 598 server->priv.len = user.len;
596 server->priv.data = new; 599 server->priv.data = new;
597 /* leave critical section */ 600 /* leave critical section */
598 if (old) ncp_kfree_s(old, oldlen); 601 kfree(old);
599 return 0; 602 return 0;
600 } 603 }
601 604
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 9e4dc30c2435..799e5c2bec55 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -196,7 +196,7 @@ ncp_renew_dentries(struct dentry *parent)
196 spin_lock(&dcache_lock); 196 spin_lock(&dcache_lock);
197 next = parent->d_subdirs.next; 197 next = parent->d_subdirs.next;
198 while (next != &parent->d_subdirs) { 198 while (next != &parent->d_subdirs) {
199 dentry = list_entry(next, struct dentry, d_child); 199 dentry = list_entry(next, struct dentry, d_u.d_child);
200 200
201 if (dentry->d_fsdata == NULL) 201 if (dentry->d_fsdata == NULL)
202 ncp_age_dentry(server, dentry); 202 ncp_age_dentry(server, dentry);
@@ -218,7 +218,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
218 spin_lock(&dcache_lock); 218 spin_lock(&dcache_lock);
219 next = parent->d_subdirs.next; 219 next = parent->d_subdirs.next;
220 while (next != &parent->d_subdirs) { 220 while (next != &parent->d_subdirs) {
221 dentry = list_entry(next, struct dentry, d_child); 221 dentry = list_entry(next, struct dentry, d_u.d_child);
222 dentry->d_fsdata = NULL; 222 dentry->d_fsdata = NULL;
223 ncp_age_dentry(server, dentry); 223 ncp_age_dentry(server, dentry);
224 next = next->next; 224 next = next->next;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e9255198f767..a1554bead692 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -194,7 +194,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
194 spin_unlock(&inode->i_lock); 194 spin_unlock(&inode->i_lock);
195 /* Ensure consistent page alignment of the data. 195 /* Ensure consistent page alignment of the data.
196 * Note: assumes we have exclusive access to this mapping either 196 * Note: assumes we have exclusive access to this mapping either
197 * through inode->i_sem or some other mechanism. 197 * through inode->i_mutex or some other mechanism.
198 */ 198 */
199 if (page->index == 0) 199 if (page->index == 0)
200 invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1); 200 invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1);
@@ -573,7 +573,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
573 573
574loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) 574loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
575{ 575{
576 down(&filp->f_dentry->d_inode->i_sem); 576 mutex_lock(&filp->f_dentry->d_inode->i_mutex);
577 switch (origin) { 577 switch (origin) {
578 case 1: 578 case 1:
579 offset += filp->f_pos; 579 offset += filp->f_pos;
@@ -589,7 +589,7 @@ loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
589 ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0; 589 ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0;
590 } 590 }
591out: 591out:
592 up(&filp->f_dentry->d_inode->i_sem); 592 mutex_unlock(&filp->f_dentry->d_inode->i_mutex);
593 return offset; 593 return offset;
594} 594}
595 595
@@ -1001,7 +1001,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1001 openflags &= ~(O_CREAT|O_TRUNC); 1001 openflags &= ~(O_CREAT|O_TRUNC);
1002 1002
1003 /* 1003 /*
1004 * Note: we're not holding inode->i_sem and so may be racing with 1004 * Note: we're not holding inode->i_mutex and so may be racing with
1005 * operations that change the directory. We therefore save the 1005 * operations that change the directory. We therefore save the
1006 * change attribute *before* we do the RPC call. 1006 * change attribute *before* we do the RPC call.
1007 */ 1007 */
@@ -1051,7 +1051,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
1051 return dentry; 1051 return dentry;
1052 if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR)) 1052 if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
1053 return NULL; 1053 return NULL;
1054 /* Note: caller is already holding the dir->i_sem! */ 1054 /* Note: caller is already holding the dir->i_mutex! */
1055 dentry = d_alloc(parent, &name); 1055 dentry = d_alloc(parent, &name);
1056 if (dentry == NULL) 1056 if (dentry == NULL)
1057 return NULL; 1057 return NULL;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e7bd0d92600f..a77ee95b7efb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -644,10 +644,7 @@ int nfs_sync_mapping(struct address_space *mapping)
644 if (mapping->nrpages == 0) 644 if (mapping->nrpages == 0)
645 return 0; 645 return 0;
646 unmap_mapping_range(mapping, 0, 0, 0); 646 unmap_mapping_range(mapping, 0, 0, 0);
647 ret = filemap_fdatawrite(mapping); 647 ret = filemap_write_and_wait(mapping);
648 if (ret != 0)
649 goto out;
650 ret = filemap_fdatawait(mapping);
651 if (ret != 0) 648 if (ret != 0)
652 goto out; 649 goto out;
653 ret = nfs_wb_all(mapping->host); 650 ret = nfs_wb_all(mapping->host);
@@ -864,8 +861,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
864 nfs_begin_data_update(inode); 861 nfs_begin_data_update(inode);
865 /* Write all dirty data if we're changing file permissions or size */ 862 /* Write all dirty data if we're changing file permissions or size */
866 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) { 863 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
867 if (filemap_fdatawrite(inode->i_mapping) == 0) 864 filemap_write_and_wait(inode->i_mapping);
868 filemap_fdatawait(inode->i_mapping);
869 nfs_wb_all(inode); 865 nfs_wb_all(inode);
870 } 866 }
871 /* 867 /*
@@ -954,11 +950,20 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
954 950
955 /* Flush out writes to the server in order to update c/mtime */ 951 /* Flush out writes to the server in order to update c/mtime */
956 nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT); 952 nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT);
957 if (__IS_FLG(inode, MS_NOATIME)) 953
958 need_atime = 0; 954 /*
959 else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 955 * We may force a getattr if the user cares about atime.
956 *
957 * Note that we only have to check the vfsmount flags here:
958 * - NFS always sets S_NOATIME by so checking it would give a
959 * bogus result
960 * - NFS never sets MS_NOATIME or MS_NODIRATIME so there is
961 * no point in checking those.
962 */
963 if ((mnt->mnt_flags & MNT_NOATIME) ||
964 ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
960 need_atime = 0; 965 need_atime = 0;
961 /* We may force a getattr if the user cares about atime */ 966
962 if (need_atime) 967 if (need_atime)
963 err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); 968 err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
964 else 969 else
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 985cc53b8dd5..e897e00c2c9d 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -275,7 +275,9 @@ static int __init root_nfs_parse(char *name, char *buf)
275 case Opt_noacl: 275 case Opt_noacl:
276 nfs_data.flags |= NFS_MOUNT_NOACL; 276 nfs_data.flags |= NFS_MOUNT_NOACL;
277 break; 277 break;
278 default : 278 default:
279 printk(KERN_WARNING "Root-NFS: unknown "
280 "option: %s\n", p);
279 return 0; 281 return 0;
280 } 282 }
281 } 283 }
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index 0b14938b5b62..0d4cf9486068 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -5,6 +5,7 @@
5 * 5 *
6 */ 6 */
7#include <linux/config.h> 7#include <linux/config.h>
8#include <linux/types.h>
8#include <linux/file.h> 9#include <linux/file.h>
9#include <linux/fs.h> 10#include <linux/fs.h>
10#include <linux/sunrpc/svc.h> 11#include <linux/sunrpc/svc.h>
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 361b4007d4a0..a00fe8686293 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -192,6 +192,14 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
192 } 192 }
193 if (status) 193 if (status)
194 goto out; 194 goto out;
195
196 /* Openowner is now set, so sequence id will get bumped. Now we need
197 * these checks before we do any creates: */
198 if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
199 return nfserr_grace;
200 if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
201 return nfserr_no_grace;
202
195 switch (open->op_claim_type) { 203 switch (open->op_claim_type) {
196 case NFS4_OPEN_CLAIM_DELEGATE_CUR: 204 case NFS4_OPEN_CLAIM_DELEGATE_CUR:
197 status = nfserr_inval; 205 status = nfserr_inval;
@@ -210,6 +218,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
210 goto out; 218 goto out;
211 break; 219 break;
212 case NFS4_OPEN_CLAIM_PREVIOUS: 220 case NFS4_OPEN_CLAIM_PREVIOUS:
221 open->op_stateowner->so_confirmed = 1;
213 /* 222 /*
214 * The CURRENT_FH is already set to the file being 223 * The CURRENT_FH is already set to the file being
215 * opened. (1) set open->op_cinfo, (2) set 224 * opened. (1) set open->op_cinfo, (2) set
@@ -221,6 +230,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
221 goto out; 230 goto out;
222 break; 231 break;
223 case NFS4_OPEN_CLAIM_DELEGATE_PREV: 232 case NFS4_OPEN_CLAIM_DELEGATE_PREV:
233 open->op_stateowner->so_confirmed = 1;
224 printk("NFSD: unsupported OPEN claim type %d\n", 234 printk("NFSD: unsupported OPEN claim type %d\n",
225 open->op_claim_type); 235 open->op_claim_type);
226 status = nfserr_notsupp; 236 status = nfserr_notsupp;
@@ -584,31 +594,23 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_se
584{ 594{
585 int status = nfs_ok; 595 int status = nfs_ok;
586 596
587 if (!current_fh->fh_dentry)
588 return nfserr_nofilehandle;
589
590 status = nfs_ok;
591 if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { 597 if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
592 nfs4_lock_state(); 598 nfs4_lock_state();
593 if ((status = nfs4_preprocess_stateid_op(current_fh, 599 status = nfs4_preprocess_stateid_op(current_fh,
594 &setattr->sa_stateid, 600 &setattr->sa_stateid, CHECK_FH | WR_STATE, NULL);
595 CHECK_FH | WR_STATE, NULL))) {
596 dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
597 goto out_unlock;
598 }
599 nfs4_unlock_state(); 601 nfs4_unlock_state();
602 if (status) {
603 dprintk("NFSD: nfsd4_setattr: couldn't process stateid!");
604 return status;
605 }
600 } 606 }
601 status = nfs_ok; 607 status = nfs_ok;
602 if (setattr->sa_acl != NULL) 608 if (setattr->sa_acl != NULL)
603 status = nfsd4_set_nfs4_acl(rqstp, current_fh, setattr->sa_acl); 609 status = nfsd4_set_nfs4_acl(rqstp, current_fh, setattr->sa_acl);
604 if (status) 610 if (status)
605 goto out; 611 return status;
606 status = nfsd_setattr(rqstp, current_fh, &setattr->sa_iattr, 612 status = nfsd_setattr(rqstp, current_fh, &setattr->sa_iattr,
607 0, (time_t)0); 613 0, (time_t)0);
608out:
609 return status;
610out_unlock:
611 nfs4_unlock_state();
612 return status; 614 return status;
613} 615}
614 616
@@ -626,15 +628,17 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
626 return nfserr_inval; 628 return nfserr_inval;
627 629
628 nfs4_lock_state(); 630 nfs4_lock_state();
629 if ((status = nfs4_preprocess_stateid_op(current_fh, stateid, 631 status = nfs4_preprocess_stateid_op(current_fh, stateid,
630 CHECK_FH | WR_STATE, &filp))) { 632 CHECK_FH | WR_STATE, &filp);
631 dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
632 goto out;
633 }
634 if (filp) 633 if (filp)
635 get_file(filp); 634 get_file(filp);
636 nfs4_unlock_state(); 635 nfs4_unlock_state();
637 636
637 if (status) {
638 dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
639 return status;
640 }
641
638 write->wr_bytes_written = write->wr_buflen; 642 write->wr_bytes_written = write->wr_buflen;
639 write->wr_how_written = write->wr_stable_how; 643 write->wr_how_written = write->wr_stable_how;
640 p = (u32 *)write->wr_verifier.data; 644 p = (u32 *)write->wr_verifier.data;
@@ -650,9 +654,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
650 if (status == nfserr_symlink) 654 if (status == nfserr_symlink)
651 status = nfserr_inval; 655 status = nfserr_inval;
652 return status; 656 return status;
653out:
654 nfs4_unlock_state();
655 return status;
656} 657}
657 658
658/* This routine never returns NFS_OK! If there are no other errors, it 659/* This routine never returns NFS_OK! If there are no other errors, it
@@ -768,6 +769,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
768 while (!status && resp->opcnt < args->opcnt) { 769 while (!status && resp->opcnt < args->opcnt) {
769 op = &args->ops[resp->opcnt++]; 770 op = &args->ops[resp->opcnt++];
770 771
772 dprintk("nfsv4 compound op #%d: %d\n", resp->opcnt, op->opnum);
773
771 /* 774 /*
772 * The XDR decode routines may have pre-set op->status; 775 * The XDR decode routines may have pre-set op->status;
773 * for example, if there is a miscellaneous XDR error 776 * for example, if there is a miscellaneous XDR error
@@ -792,17 +795,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
792 /* All operations except RENEW, SETCLIENTID, RESTOREFH 795 /* All operations except RENEW, SETCLIENTID, RESTOREFH
793 * SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH 796 * SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH
794 * require a valid current filehandle 797 * require a valid current filehandle
795 *
796 * SETATTR NOFILEHANDLE error handled in nfsd4_setattr
797 * due to required returned bitmap argument
798 */ 798 */
799 if ((!current_fh->fh_dentry) && 799 if ((!current_fh->fh_dentry) &&
800 !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) || 800 !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) ||
801 (op->opnum == OP_SETCLIENTID) || 801 (op->opnum == OP_SETCLIENTID) ||
802 (op->opnum == OP_SETCLIENTID_CONFIRM) || 802 (op->opnum == OP_SETCLIENTID_CONFIRM) ||
803 (op->opnum == OP_RENEW) || (op->opnum == OP_RESTOREFH) || 803 (op->opnum == OP_RENEW) || (op->opnum == OP_RESTOREFH) ||
804 (op->opnum == OP_RELEASE_LOCKOWNER) || 804 (op->opnum == OP_RELEASE_LOCKOWNER))) {
805 (op->opnum == OP_SETATTR))) {
806 op->status = nfserr_nofilehandle; 805 op->status = nfserr_nofilehandle;
807 goto encode_op; 806 goto encode_op;
808 } 807 }
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 954cf893d50c..06da7506363c 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -121,9 +121,9 @@ out:
121static void 121static void
122nfsd4_sync_rec_dir(void) 122nfsd4_sync_rec_dir(void)
123{ 123{
124 down(&rec_dir.dentry->d_inode->i_sem); 124 mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
125 nfsd_sync_dir(rec_dir.dentry); 125 nfsd_sync_dir(rec_dir.dentry);
126 up(&rec_dir.dentry->d_inode->i_sem); 126 mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
127} 127}
128 128
129int 129int
@@ -143,7 +143,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
143 nfs4_save_user(&uid, &gid); 143 nfs4_save_user(&uid, &gid);
144 144
145 /* lock the parent */ 145 /* lock the parent */
146 down(&rec_dir.dentry->d_inode->i_sem); 146 mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
147 147
148 dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1); 148 dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1);
149 if (IS_ERR(dentry)) { 149 if (IS_ERR(dentry)) {
@@ -159,7 +159,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
159out_put: 159out_put:
160 dput(dentry); 160 dput(dentry);
161out_unlock: 161out_unlock:
162 up(&rec_dir.dentry->d_inode->i_sem); 162 mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
163 if (status == 0) { 163 if (status == 0) {
164 clp->cl_firststate = 1; 164 clp->cl_firststate = 1;
165 nfsd4_sync_rec_dir(); 165 nfsd4_sync_rec_dir();
@@ -222,8 +222,7 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
222 222
223 nfs4_save_user(&uid, &gid); 223 nfs4_save_user(&uid, &gid);
224 224
225 filp = dentry_open(dget(dir), mntget(rec_dir.mnt), 225 filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY);
226 O_RDWR);
227 status = PTR_ERR(filp); 226 status = PTR_ERR(filp);
228 if (IS_ERR(filp)) 227 if (IS_ERR(filp))
229 goto out; 228 goto out;
@@ -259,9 +258,9 @@ nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry)
259 printk("nfsd4: non-file found in client recovery directory\n"); 258 printk("nfsd4: non-file found in client recovery directory\n");
260 return -EINVAL; 259 return -EINVAL;
261 } 260 }
262 down(&dir->d_inode->i_sem); 261 mutex_lock(&dir->d_inode->i_mutex);
263 status = vfs_unlink(dir->d_inode, dentry); 262 status = vfs_unlink(dir->d_inode, dentry);
264 up(&dir->d_inode->i_sem); 263 mutex_unlock(&dir->d_inode->i_mutex);
265 return status; 264 return status;
266} 265}
267 266
@@ -274,9 +273,9 @@ nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
274 * any regular files anyway, just in case the directory was created by 273 * any regular files anyway, just in case the directory was created by
275 * a kernel from the future.... */ 274 * a kernel from the future.... */
276 nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); 275 nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
277 down(&dir->d_inode->i_sem); 276 mutex_lock(&dir->d_inode->i_mutex);
278 status = vfs_rmdir(dir->d_inode, dentry); 277 status = vfs_rmdir(dir->d_inode, dentry);
279 up(&dir->d_inode->i_sem); 278 mutex_unlock(&dir->d_inode->i_mutex);
280 return status; 279 return status;
281} 280}
282 281
@@ -288,9 +287,9 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
288 287
289 dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); 288 dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
290 289
291 down(&rec_dir.dentry->d_inode->i_sem); 290 mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
292 dentry = lookup_one_len(name, rec_dir.dentry, namlen); 291 dentry = lookup_one_len(name, rec_dir.dentry, namlen);
293 up(&rec_dir.dentry->d_inode->i_sem); 292 mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
294 if (IS_ERR(dentry)) { 293 if (IS_ERR(dentry)) {
295 status = PTR_ERR(dentry); 294 status = PTR_ERR(dentry);
296 return status; 295 return status;
@@ -400,9 +399,10 @@ nfsd4_init_recdir(char *rec_dirname)
400 399
401 nfs4_save_user(&uid, &gid); 400 nfs4_save_user(&uid, &gid);
402 401
403 status = path_lookup(rec_dirname, LOOKUP_FOLLOW, &rec_dir); 402 status = path_lookup(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
404 if (status == -ENOENT) 403 &rec_dir);
405 printk("NFSD: recovery directory %s doesn't exist\n", 404 if (status)
405 printk("NFSD: unable to find recovery directory %s\n",
406 rec_dirname); 406 rec_dirname);
407 407
408 if (!status) 408 if (!status)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6bbefd06f10d..1143cfb64549 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1088,7 +1088,7 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
1088 sop->so_seqid = open->op_seqid; 1088 sop->so_seqid = open->op_seqid;
1089 sop->so_confirmed = 0; 1089 sop->so_confirmed = 0;
1090 rp = &sop->so_replay; 1090 rp = &sop->so_replay;
1091 rp->rp_status = NFSERR_SERVERFAULT; 1091 rp->rp_status = nfserr_serverfault;
1092 rp->rp_buflen = 0; 1092 rp->rp_buflen = 0;
1093 rp->rp_buf = rp->rp_ibuf; 1093 rp->rp_buf = rp->rp_ibuf;
1094 return sop; 1094 return sop;
@@ -1178,7 +1178,6 @@ release_stateid(struct nfs4_stateid *stp, int flags)
1178 locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner); 1178 locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
1179 put_nfs4_file(stp->st_file); 1179 put_nfs4_file(stp->st_file);
1180 kmem_cache_free(stateid_slab, stp); 1180 kmem_cache_free(stateid_slab, stp);
1181 stp = NULL;
1182} 1181}
1183 1182
1184static void 1183static void
@@ -1191,22 +1190,6 @@ move_to_close_lru(struct nfs4_stateowner *sop)
1191 sop->so_time = get_seconds(); 1190 sop->so_time = get_seconds();
1192} 1191}
1193 1192
1194static void
1195release_state_owner(struct nfs4_stateid *stp, int flag)
1196{
1197 struct nfs4_stateowner *sop = stp->st_stateowner;
1198
1199 dprintk("NFSD: release_state_owner\n");
1200 release_stateid(stp, flag);
1201
1202 /* place unused nfs4_stateowners on so_close_lru list to be
1203 * released by the laundromat service after the lease period
1204 * to enable us to handle CLOSE replay
1205 */
1206 if (sop->so_confirmed && list_empty(&sop->so_stateids))
1207 move_to_close_lru(sop);
1208}
1209
1210static int 1193static int
1211cmp_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, clientid_t *clid) { 1194cmp_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, clientid_t *clid) {
1212 return ((sop->so_owner.len == owner->len) && 1195 return ((sop->so_owner.len == owner->len) &&
@@ -1446,92 +1429,61 @@ static struct lock_manager_operations nfsd_lease_mng_ops = {
1446}; 1429};
1447 1430
1448 1431
1449/*
1450 * nfsd4_process_open1()
1451 * lookup stateowner.
1452 * found:
1453 * check confirmed
1454 * confirmed:
1455 * check seqid
1456 * not confirmed:
1457 * delete owner
1458 * create new owner
1459 * notfound:
1460 * verify clientid
1461 * create new owner
1462 *
1463 * called with nfs4_lock_state() held.
1464 */
1465int 1432int
1466nfsd4_process_open1(struct nfsd4_open *open) 1433nfsd4_process_open1(struct nfsd4_open *open)
1467{ 1434{
1468 int status;
1469 clientid_t *clientid = &open->op_clientid; 1435 clientid_t *clientid = &open->op_clientid;
1470 struct nfs4_client *clp = NULL; 1436 struct nfs4_client *clp = NULL;
1471 unsigned int strhashval; 1437 unsigned int strhashval;
1472 struct nfs4_stateowner *sop = NULL; 1438 struct nfs4_stateowner *sop = NULL;
1473 1439
1474 status = nfserr_inval;
1475 if (!check_name(open->op_owner)) 1440 if (!check_name(open->op_owner))
1476 goto out; 1441 return nfserr_inval;
1477 1442
1478 if (STALE_CLIENTID(&open->op_clientid)) 1443 if (STALE_CLIENTID(&open->op_clientid))
1479 return nfserr_stale_clientid; 1444 return nfserr_stale_clientid;
1480 1445
1481 strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner); 1446 strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner);
1482 sop = find_openstateowner_str(strhashval, open); 1447 sop = find_openstateowner_str(strhashval, open);
1483 if (sop) { 1448 open->op_stateowner = sop;
1484 open->op_stateowner = sop; 1449 if (!sop) {
1485 /* check for replay */ 1450 /* Make sure the client's lease hasn't expired. */
1486 if (open->op_seqid == sop->so_seqid - 1){
1487 if (sop->so_replay.rp_buflen)
1488 return NFSERR_REPLAY_ME;
1489 else {
1490 /* The original OPEN failed so spectacularly
1491 * that we don't even have replay data saved!
1492 * Therefore, we have no choice but to continue
1493 * processing this OPEN; presumably, we'll
1494 * fail again for the same reason.
1495 */
1496 dprintk("nfsd4_process_open1:"
1497 " replay with no replay cache\n");
1498 goto renew;
1499 }
1500 } else if (sop->so_confirmed) {
1501 if (open->op_seqid == sop->so_seqid)
1502 goto renew;
1503 status = nfserr_bad_seqid;
1504 goto out;
1505 } else {
1506 /* If we get here, we received an OPEN for an
1507 * unconfirmed nfs4_stateowner. Since the seqid's are
1508 * different, purge the existing nfs4_stateowner, and
1509 * instantiate a new one.
1510 */
1511 clp = sop->so_client;
1512 release_stateowner(sop);
1513 }
1514 } else {
1515 /* nfs4_stateowner not found.
1516 * Verify clientid and instantiate new nfs4_stateowner.
1517 * If verify fails this is presumably the result of the
1518 * client's lease expiring.
1519 */
1520 status = nfserr_expired;
1521 clp = find_confirmed_client(clientid); 1451 clp = find_confirmed_client(clientid);
1522 if (clp == NULL) 1452 if (clp == NULL)
1523 goto out; 1453 return nfserr_expired;
1454 goto renew;
1524 } 1455 }
1525 status = nfserr_resource; 1456 if (!sop->so_confirmed) {
1526 sop = alloc_init_open_stateowner(strhashval, clp, open); 1457 /* Replace unconfirmed owners without checking for replay. */
1527 if (sop == NULL) 1458 clp = sop->so_client;
1528 goto out; 1459 release_stateowner(sop);
1529 open->op_stateowner = sop; 1460 open->op_stateowner = NULL;
1461 goto renew;
1462 }
1463 if (open->op_seqid == sop->so_seqid - 1) {
1464 if (sop->so_replay.rp_buflen)
1465 return NFSERR_REPLAY_ME;
1466 /* The original OPEN failed so spectacularly
1467 * that we don't even have replay data saved!
1468 * Therefore, we have no choice but to continue
1469 * processing this OPEN; presumably, we'll
1470 * fail again for the same reason.
1471 */
1472 dprintk("nfsd4_process_open1: replay with no replay cache\n");
1473 goto renew;
1474 }
1475 if (open->op_seqid != sop->so_seqid)
1476 return nfserr_bad_seqid;
1530renew: 1477renew:
1531 status = nfs_ok; 1478 if (open->op_stateowner == NULL) {
1479 sop = alloc_init_open_stateowner(strhashval, clp, open);
1480 if (sop == NULL)
1481 return nfserr_resource;
1482 open->op_stateowner = sop;
1483 }
1484 list_del_init(&sop->so_close_lru);
1532 renew_client(sop->so_client); 1485 renew_client(sop->so_client);
1533out: 1486 return nfs_ok;
1534 return status;
1535} 1487}
1536 1488
1537static inline int 1489static inline int
@@ -1648,7 +1600,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
1648 if (!open->op_truncate) 1600 if (!open->op_truncate)
1649 return 0; 1601 return 0;
1650 if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) 1602 if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
1651 return -EINVAL; 1603 return nfserr_inval;
1652 return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0); 1604 return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
1653} 1605}
1654 1606
@@ -1657,26 +1609,26 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta
1657{ 1609{
1658 struct file *filp = stp->st_vfs_file; 1610 struct file *filp = stp->st_vfs_file;
1659 struct inode *inode = filp->f_dentry->d_inode; 1611 struct inode *inode = filp->f_dentry->d_inode;
1660 unsigned int share_access; 1612 unsigned int share_access, new_writer;
1661 int status; 1613 int status;
1662 1614
1663 set_access(&share_access, stp->st_access_bmap); 1615 set_access(&share_access, stp->st_access_bmap);
1664 share_access = ~share_access; 1616 new_writer = (~share_access) & open->op_share_access
1665 share_access &= open->op_share_access; 1617 & NFS4_SHARE_ACCESS_WRITE;
1666
1667 if (!(share_access & NFS4_SHARE_ACCESS_WRITE))
1668 return nfsd4_truncate(rqstp, cur_fh, open);
1669 1618
1670 status = get_write_access(inode); 1619 if (new_writer) {
1671 if (status) 1620 status = get_write_access(inode);
1672 return nfserrno(status); 1621 if (status)
1622 return nfserrno(status);
1623 }
1673 status = nfsd4_truncate(rqstp, cur_fh, open); 1624 status = nfsd4_truncate(rqstp, cur_fh, open);
1674 if (status) { 1625 if (status) {
1675 put_write_access(inode); 1626 if (new_writer)
1627 put_write_access(inode);
1676 return status; 1628 return status;
1677 } 1629 }
1678 /* remember the open */ 1630 /* remember the open */
1679 filp->f_mode = (filp->f_mode | FMODE_WRITE) & ~FMODE_READ; 1631 filp->f_mode |= open->op_share_access;
1680 set_bit(open->op_share_access, &stp->st_access_bmap); 1632 set_bit(open->op_share_access, &stp->st_access_bmap);
1681 set_bit(open->op_share_deny, &stp->st_deny_bmap); 1633 set_bit(open->op_share_deny, &stp->st_deny_bmap);
1682 1634
@@ -1780,12 +1732,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1780 struct nfs4_delegation *dp = NULL; 1732 struct nfs4_delegation *dp = NULL;
1781 int status; 1733 int status;
1782 1734
1783 if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
1784 return nfserr_grace;
1785
1786 if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
1787 return nfserr_no_grace;
1788
1789 status = nfserr_inval; 1735 status = nfserr_inval;
1790 if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny)) 1736 if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny))
1791 goto out; 1737 goto out;
@@ -2423,15 +2369,19 @@ nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_clos
2423 CHECK_FH | OPEN_STATE | CLOSE_STATE, 2369 CHECK_FH | OPEN_STATE | CLOSE_STATE,
2424 &close->cl_stateowner, &stp, NULL))) 2370 &close->cl_stateowner, &stp, NULL)))
2425 goto out; 2371 goto out;
2426 /*
2427 * Return success, but first update the stateid.
2428 */
2429 status = nfs_ok; 2372 status = nfs_ok;
2430 update_stateid(&stp->st_stateid); 2373 update_stateid(&stp->st_stateid);
2431 memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t)); 2374 memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t));
2432 2375
2433 /* release_state_owner() calls nfsd_close() if needed */ 2376 /* release_stateid() calls nfsd_close() if needed */
2434 release_state_owner(stp, OPEN_STATE); 2377 release_stateid(stp, OPEN_STATE);
2378
2379 /* place unused nfs4_stateowners on so_close_lru list to be
2380 * released by the laundromat service after the lease period
2381 * to enable us to handle CLOSE replay
2382 */
2383 if (list_empty(&close->cl_stateowner->so_stateids))
2384 move_to_close_lru(close->cl_stateowner);
2435out: 2385out:
2436 if (close->cl_stateowner) { 2386 if (close->cl_stateowner) {
2437 nfs4_get_stateowner(close->cl_stateowner); 2387 nfs4_get_stateowner(close->cl_stateowner);
@@ -2633,7 +2583,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
2633 sop->so_seqid = lock->lk_new_lock_seqid + 1; 2583 sop->so_seqid = lock->lk_new_lock_seqid + 1;
2634 sop->so_confirmed = 1; 2584 sop->so_confirmed = 1;
2635 rp = &sop->so_replay; 2585 rp = &sop->so_replay;
2636 rp->rp_status = NFSERR_SERVERFAULT; 2586 rp->rp_status = nfserr_serverfault;
2637 rp->rp_buflen = 0; 2587 rp->rp_buflen = 0;
2638 rp->rp_buf = rp->rp_ibuf; 2588 rp->rp_buf = rp->rp_ibuf;
2639 return sop; 2589 return sop;
@@ -2700,6 +2650,11 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2700 if (check_lock_length(lock->lk_offset, lock->lk_length)) 2650 if (check_lock_length(lock->lk_offset, lock->lk_length))
2701 return nfserr_inval; 2651 return nfserr_inval;
2702 2652
2653 if ((status = fh_verify(rqstp, current_fh, S_IFREG, MAY_LOCK))) {
2654 dprintk("NFSD: nfsd4_lock: permission denied!\n");
2655 return status;
2656 }
2657
2703 nfs4_lock_state(); 2658 nfs4_lock_state();
2704 2659
2705 if (lock->lk_is_new) { 2660 if (lock->lk_is_new) {
@@ -2720,11 +2675,11 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2720 lock->lk_new_open_seqid, 2675 lock->lk_new_open_seqid,
2721 &lock->lk_new_open_stateid, 2676 &lock->lk_new_open_stateid,
2722 CHECK_FH | OPEN_STATE, 2677 CHECK_FH | OPEN_STATE,
2723 &lock->lk_stateowner, &open_stp, 2678 &lock->lk_replay_owner, &open_stp,
2724 lock); 2679 lock);
2725 if (status) 2680 if (status)
2726 goto out; 2681 goto out;
2727 open_sop = lock->lk_stateowner; 2682 open_sop = lock->lk_replay_owner;
2728 /* create lockowner and lock stateid */ 2683 /* create lockowner and lock stateid */
2729 fp = open_stp->st_file; 2684 fp = open_stp->st_file;
2730 strhashval = lock_ownerstr_hashval(fp->fi_inode, 2685 strhashval = lock_ownerstr_hashval(fp->fi_inode,
@@ -2739,29 +2694,22 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2739 if (lock_sop == NULL) 2694 if (lock_sop == NULL)
2740 goto out; 2695 goto out;
2741 lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp); 2696 lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp);
2742 if (lock_stp == NULL) { 2697 if (lock_stp == NULL)
2743 release_stateowner(lock_sop);
2744 goto out; 2698 goto out;
2745 }
2746 } else { 2699 } else {
2747 /* lock (lock owner + lock stateid) already exists */ 2700 /* lock (lock owner + lock stateid) already exists */
2748 status = nfs4_preprocess_seqid_op(current_fh, 2701 status = nfs4_preprocess_seqid_op(current_fh,
2749 lock->lk_old_lock_seqid, 2702 lock->lk_old_lock_seqid,
2750 &lock->lk_old_lock_stateid, 2703 &lock->lk_old_lock_stateid,
2751 CHECK_FH | LOCK_STATE, 2704 CHECK_FH | LOCK_STATE,
2752 &lock->lk_stateowner, &lock_stp, lock); 2705 &lock->lk_replay_owner, &lock_stp, lock);
2753 if (status) 2706 if (status)
2754 goto out; 2707 goto out;
2755 lock_sop = lock->lk_stateowner; 2708 lock_sop = lock->lk_replay_owner;
2756 } 2709 }
2757 /* lock->lk_stateowner and lock_stp have been created or found */ 2710 /* lock->lk_replay_owner and lock_stp have been created or found */
2758 filp = lock_stp->st_vfs_file; 2711 filp = lock_stp->st_vfs_file;
2759 2712
2760 if ((status = fh_verify(rqstp, current_fh, S_IFREG, MAY_LOCK))) {
2761 dprintk("NFSD: nfsd4_lock: permission denied!\n");
2762 goto out;
2763 }
2764
2765 status = nfserr_grace; 2713 status = nfserr_grace;
2766 if (nfs4_in_grace() && !lock->lk_reclaim) 2714 if (nfs4_in_grace() && !lock->lk_reclaim)
2767 goto out; 2715 goto out;
@@ -2802,8 +2750,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2802 */ 2750 */
2803 2751
2804 status = posix_lock_file(filp, &file_lock); 2752 status = posix_lock_file(filp, &file_lock);
2805 if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
2806 file_lock.fl_ops->fl_release_private(&file_lock);
2807 dprintk("NFSD: nfsd4_lock: posix_lock_file status %d\n",status); 2753 dprintk("NFSD: nfsd4_lock: posix_lock_file status %d\n",status);
2808 switch (-status) { 2754 switch (-status) {
2809 case 0: /* success! */ 2755 case 0: /* success! */
@@ -2815,9 +2761,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2815 goto conflicting_lock; 2761 goto conflicting_lock;
2816 case (EDEADLK): 2762 case (EDEADLK):
2817 status = nfserr_deadlock; 2763 status = nfserr_deadlock;
2764 dprintk("NFSD: nfsd4_lock: posix_lock_file() failed! status %d\n",status);
2765 goto out;
2818 default: 2766 default:
2767 status = nfserrno(status);
2819 dprintk("NFSD: nfsd4_lock: posix_lock_file() failed! status %d\n",status); 2768 dprintk("NFSD: nfsd4_lock: posix_lock_file() failed! status %d\n",status);
2820 goto out_destroy_new_stateid; 2769 goto out;
2821 } 2770 }
2822 2771
2823conflicting_lock: 2772conflicting_lock:
@@ -2831,20 +2780,12 @@ conflicting_lock:
2831 goto out; 2780 goto out;
2832 } 2781 }
2833 nfs4_set_lock_denied(conflock, &lock->lk_denied); 2782 nfs4_set_lock_denied(conflock, &lock->lk_denied);
2834
2835out_destroy_new_stateid:
2836 if (lock->lk_is_new) {
2837 dprintk("NFSD: nfsd4_lock: destroy new stateid!\n");
2838 /*
2839 * An error encountered after instantiation of the new
2840 * stateid has forced us to destroy it.
2841 */
2842 release_state_owner(lock_stp, LOCK_STATE);
2843 }
2844out: 2783out:
2845 if (lock->lk_stateowner) { 2784 if (status && lock->lk_is_new && lock_sop)
2846 nfs4_get_stateowner(lock->lk_stateowner); 2785 release_stateowner(lock_sop);
2847 *replay_owner = lock->lk_stateowner; 2786 if (lock->lk_replay_owner) {
2787 nfs4_get_stateowner(lock->lk_replay_owner);
2788 *replay_owner = lock->lk_replay_owner;
2848 } 2789 }
2849 nfs4_unlock_state(); 2790 nfs4_unlock_state();
2850 return status; 2791 return status;
@@ -2977,8 +2918,6 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2977 * Try to unlock the file in the VFS. 2918 * Try to unlock the file in the VFS.
2978 */ 2919 */
2979 status = posix_lock_file(filp, &file_lock); 2920 status = posix_lock_file(filp, &file_lock);
2980 if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
2981 file_lock.fl_ops->fl_release_private(&file_lock);
2982 if (status) { 2921 if (status) {
2983 dprintk("NFSD: nfs4_locku: posix_lock_file failed!\n"); 2922 dprintk("NFSD: nfs4_locku: posix_lock_file failed!\n");
2984 goto out_nfserr; 2923 goto out_nfserr;
@@ -3016,9 +2955,10 @@ check_for_locks(struct file *filp, struct nfs4_stateowner *lowner)
3016 2955
3017 lock_kernel(); 2956 lock_kernel();
3018 for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { 2957 for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
3019 if ((*flpp)->fl_owner == (fl_owner_t)lowner) 2958 if ((*flpp)->fl_owner == (fl_owner_t)lowner) {
3020 status = 1; 2959 status = 1;
3021 goto out; 2960 goto out;
2961 }
3022 } 2962 }
3023out: 2963out:
3024 unlock_kernel(); 2964 unlock_kernel();
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index dcd673186944..69d3501173a8 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -528,7 +528,7 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
528{ 528{
529 DECODE_HEAD; 529 DECODE_HEAD;
530 530
531 lock->lk_stateowner = NULL; 531 lock->lk_replay_owner = NULL;
532 /* 532 /*
533 * type, reclaim(boolean), offset, length, new_lock_owner(boolean) 533 * type, reclaim(boolean), offset, length, new_lock_owner(boolean)
534 */ 534 */
@@ -1764,10 +1764,11 @@ nfsd4_encode_dirent(struct readdir_cd *ccd, const char *name, int namlen,
1764 */ 1764 */
1765 if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)) 1765 if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
1766 goto fail; 1766 goto fail;
1767 nfserr = nfserr_toosmall;
1768 p = nfsd4_encode_rdattr_error(p, buflen, nfserr); 1767 p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
1769 if (p == NULL) 1768 if (p == NULL) {
1769 nfserr = nfserr_toosmall;
1770 goto fail; 1770 goto fail;
1771 }
1771 } 1772 }
1772 cd->buflen -= (p - cd->buffer); 1773 cd->buflen -= (p - cd->buffer);
1773 cd->buffer = p; 1774 cd->buffer = p;
@@ -1895,7 +1896,6 @@ nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denie
1895static void 1896static void
1896nfsd4_encode_lock(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lock *lock) 1897nfsd4_encode_lock(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lock *lock)
1897{ 1898{
1898
1899 ENCODE_SEQID_OP_HEAD; 1899 ENCODE_SEQID_OP_HEAD;
1900 1900
1901 if (!nfserr) { 1901 if (!nfserr) {
@@ -1906,7 +1906,7 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lock
1906 } else if (nfserr == nfserr_denied) 1906 } else if (nfserr == nfserr_denied)
1907 nfsd4_encode_lock_denied(resp, &lock->lk_denied); 1907 nfsd4_encode_lock_denied(resp, &lock->lk_denied);
1908 1908
1909 ENCODE_SEQID_OP_TAIL(lock->lk_stateowner); 1909 ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner);
1910} 1910}
1911 1911
1912static void 1912static void
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0aa1b9603d7f..3e6b75cd90fd 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -36,6 +36,22 @@ nfsd_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
36 return nfs_ok; 36 return nfs_ok;
37} 37}
38 38
39static int
40nfsd_return_attrs(int err, struct nfsd_attrstat *resp)
41{
42 if (err) return err;
43 return nfserrno(vfs_getattr(resp->fh.fh_export->ex_mnt,
44 resp->fh.fh_dentry,
45 &resp->stat));
46}
47static int
48nfsd_return_dirop(int err, struct nfsd_diropres *resp)
49{
50 if (err) return err;
51 return nfserrno(vfs_getattr(resp->fh.fh_export->ex_mnt,
52 resp->fh.fh_dentry,
53 &resp->stat));
54}
39/* 55/*
40 * Get a file's attributes 56 * Get a file's attributes
41 * N.B. After this call resp->fh needs an fh_put 57 * N.B. After this call resp->fh needs an fh_put
@@ -44,10 +60,12 @@ static int
44nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, 60nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
45 struct nfsd_attrstat *resp) 61 struct nfsd_attrstat *resp)
46{ 62{
63 int nfserr;
47 dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); 64 dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
48 65
49 fh_copy(&resp->fh, &argp->fh); 66 fh_copy(&resp->fh, &argp->fh);
50 return fh_verify(rqstp, &resp->fh, 0, MAY_NOP); 67 nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
68 return nfsd_return_attrs(nfserr, resp);
51} 69}
52 70
53/* 71/*
@@ -58,12 +76,14 @@ static int
58nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp, 76nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
59 struct nfsd_attrstat *resp) 77 struct nfsd_attrstat *resp)
60{ 78{
79 int nfserr;
61 dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n", 80 dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n",
62 SVCFH_fmt(&argp->fh), 81 SVCFH_fmt(&argp->fh),
63 argp->attrs.ia_valid, (long) argp->attrs.ia_size); 82 argp->attrs.ia_valid, (long) argp->attrs.ia_size);
64 83
65 fh_copy(&resp->fh, &argp->fh); 84 fh_copy(&resp->fh, &argp->fh);
66 return nfsd_setattr(rqstp, &resp->fh, &argp->attrs,0, (time_t)0); 85 nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,0, (time_t)0);
86 return nfsd_return_attrs(nfserr, resp);
67} 87}
68 88
69/* 89/*
@@ -86,7 +106,7 @@ nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
86 &resp->fh); 106 &resp->fh);
87 107
88 fh_put(&argp->fh); 108 fh_put(&argp->fh);
89 return nfserr; 109 return nfsd_return_dirop(nfserr, resp);
90} 110}
91 111
92/* 112/*
@@ -142,7 +162,10 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
142 argp->vec, argp->vlen, 162 argp->vec, argp->vlen,
143 &resp->count); 163 &resp->count);
144 164
145 return nfserr; 165 if (nfserr) return nfserr;
166 return nfserrno(vfs_getattr(resp->fh.fh_export->ex_mnt,
167 resp->fh.fh_dentry,
168 &resp->stat));
146} 169}
147 170
148/* 171/*
@@ -165,7 +188,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
165 argp->vec, argp->vlen, 188 argp->vec, argp->vlen,
166 argp->len, 189 argp->len,
167 &stable); 190 &stable);
168 return nfserr; 191 return nfsd_return_attrs(nfserr, resp);
169} 192}
170 193
171/* 194/*
@@ -322,7 +345,7 @@ out_unlock:
322 345
323done: 346done:
324 fh_put(dirfhp); 347 fh_put(dirfhp);
325 return nfserr; 348 return nfsd_return_dirop(nfserr, resp);
326} 349}
327 350
328static int 351static int
@@ -425,7 +448,7 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
425 nfserr = nfsd_create(rqstp, &argp->fh, argp->name, argp->len, 448 nfserr = nfsd_create(rqstp, &argp->fh, argp->name, argp->len,
426 &argp->attrs, S_IFDIR, 0, &resp->fh); 449 &argp->attrs, S_IFDIR, 0, &resp->fh);
427 fh_put(&argp->fh); 450 fh_put(&argp->fh);
428 return nfserr; 451 return nfsd_return_dirop(nfserr, resp);
429} 452}
430 453
431/* 454/*
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index aa7bb41b293d..e3a0797dd56b 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -37,7 +37,7 @@ static u32 nfs_ftypes[] = {
37/* 37/*
38 * XDR functions for basic NFS types 38 * XDR functions for basic NFS types
39 */ 39 */
40static inline u32 * 40static u32 *
41decode_fh(u32 *p, struct svc_fh *fhp) 41decode_fh(u32 *p, struct svc_fh *fhp)
42{ 42{
43 fh_init(fhp, NFS_FHSIZE); 43 fh_init(fhp, NFS_FHSIZE);
@@ -151,7 +151,7 @@ decode_sattr(u32 *p, struct iattr *iap)
151 return p; 151 return p;
152} 152}
153 153
154static inline u32 * 154static u32 *
155encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp, 155encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp,
156 struct kstat *stat) 156 struct kstat *stat)
157{ 157{
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index df4019f04560..5320e5afaddb 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -48,8 +48,8 @@
48#include <linux/fsnotify.h> 48#include <linux/fsnotify.h>
49#include <linux/posix_acl.h> 49#include <linux/posix_acl.h>
50#include <linux/posix_acl_xattr.h> 50#include <linux/posix_acl_xattr.h>
51#ifdef CONFIG_NFSD_V4
52#include <linux/xattr.h> 51#include <linux/xattr.h>
52#ifdef CONFIG_NFSD_V4
53#include <linux/nfs4.h> 53#include <linux/nfs4.h>
54#include <linux/nfs4_acl.h> 54#include <linux/nfs4_acl.h>
55#include <linux/nfsd_idmap.h> 55#include <linux/nfsd_idmap.h>
@@ -365,8 +365,30 @@ out_nfserr:
365 goto out; 365 goto out;
366} 366}
367 367
368#if defined(CONFIG_NFSD_V4) 368#if defined(CONFIG_NFSD_V2_ACL) || \
369 defined(CONFIG_NFSD_V3_ACL) || \
370 defined(CONFIG_NFSD_V4)
371static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
372{
373 ssize_t buflen;
374 int error;
375
376 buflen = vfs_getxattr(dentry, key, NULL, 0);
377 if (buflen <= 0)
378 return buflen;
379
380 *buf = kmalloc(buflen, GFP_KERNEL);
381 if (!*buf)
382 return -ENOMEM;
383
384 error = vfs_getxattr(dentry, key, *buf, buflen);
385 if (error < 0)
386 return error;
387 return buflen;
388}
389#endif
369 390
391#if defined(CONFIG_NFSD_V4)
370static int 392static int
371set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) 393set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
372{ 394{
@@ -374,7 +396,6 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
374 size_t buflen; 396 size_t buflen;
375 char *buf = NULL; 397 char *buf = NULL;
376 int error = 0; 398 int error = 0;
377 struct inode *inode = dentry->d_inode;
378 399
379 buflen = posix_acl_xattr_size(pacl->a_count); 400 buflen = posix_acl_xattr_size(pacl->a_count);
380 buf = kmalloc(buflen, GFP_KERNEL); 401 buf = kmalloc(buflen, GFP_KERNEL);
@@ -388,15 +409,7 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
388 goto out; 409 goto out;
389 } 410 }
390 411
391 error = -EOPNOTSUPP; 412 error = vfs_setxattr(dentry, key, buf, len, 0);
392 if (inode->i_op && inode->i_op->setxattr) {
393 down(&inode->i_sem);
394 security_inode_setxattr(dentry, key, buf, len, 0);
395 error = inode->i_op->setxattr(dentry, key, buf, len, 0);
396 if (!error)
397 security_inode_post_setxattr(dentry, key, buf, len, 0);
398 up(&inode->i_sem);
399 }
400out: 413out:
401 kfree(buf); 414 kfree(buf);
402 return error; 415 return error;
@@ -455,44 +468,19 @@ out_nfserr:
455static struct posix_acl * 468static struct posix_acl *
456_get_posix_acl(struct dentry *dentry, char *key) 469_get_posix_acl(struct dentry *dentry, char *key)
457{ 470{
458 struct inode *inode = dentry->d_inode; 471 void *buf = NULL;
459 char *buf = NULL;
460 int buflen, error = 0;
461 struct posix_acl *pacl = NULL; 472 struct posix_acl *pacl = NULL;
473 int buflen;
462 474
463 error = -EOPNOTSUPP; 475 buflen = nfsd_getxattr(dentry, key, &buf);
464 if (inode->i_op == NULL) 476 if (!buflen)
465 goto out_err; 477 buflen = -ENODATA;
466 if (inode->i_op->getxattr == NULL) 478 if (buflen <= 0)
467 goto out_err; 479 return ERR_PTR(buflen);
468
469 error = security_inode_getxattr(dentry, key);
470 if (error)
471 goto out_err;
472
473 buflen = inode->i_op->getxattr(dentry, key, NULL, 0);
474 if (buflen <= 0) {
475 error = buflen < 0 ? buflen : -ENODATA;
476 goto out_err;
477 }
478
479 buf = kmalloc(buflen, GFP_KERNEL);
480 if (buf == NULL) {
481 error = -ENOMEM;
482 goto out_err;
483 }
484
485 error = inode->i_op->getxattr(dentry, key, buf, buflen);
486 if (error < 0)
487 goto out_err;
488 480
489 pacl = posix_acl_from_xattr(buf, buflen); 481 pacl = posix_acl_from_xattr(buf, buflen);
490 out:
491 kfree(buf); 482 kfree(buf);
492 return pacl; 483 return pacl;
493 out_err:
494 pacl = ERR_PTR(error);
495 goto out;
496} 484}
497 485
498int 486int
@@ -722,14 +710,15 @@ static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
722{ 710{
723 struct inode *inode = dp->d_inode; 711 struct inode *inode = dp->d_inode;
724 int (*fsync) (struct file *, struct dentry *, int); 712 int (*fsync) (struct file *, struct dentry *, int);
725 int err = nfs_ok; 713 int err;
726 714
727 filemap_fdatawrite(inode->i_mapping); 715 err = filemap_fdatawrite(inode->i_mapping);
728 if (fop && (fsync = fop->fsync)) 716 if (err == 0 && fop && (fsync = fop->fsync))
729 err=fsync(filp, dp, 0); 717 err = fsync(filp, dp, 0);
730 filemap_fdatawait(inode->i_mapping); 718 if (err == 0)
719 err = filemap_fdatawait(inode->i_mapping);
731 720
732 return nfserrno(err); 721 return err;
733} 722}
734 723
735 724
@@ -739,17 +728,17 @@ nfsd_sync(struct file *filp)
739 int err; 728 int err;
740 struct inode *inode = filp->f_dentry->d_inode; 729 struct inode *inode = filp->f_dentry->d_inode;
741 dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name); 730 dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name);
742 down(&inode->i_sem); 731 mutex_lock(&inode->i_mutex);
743 err=nfsd_dosync(filp, filp->f_dentry, filp->f_op); 732 err=nfsd_dosync(filp, filp->f_dentry, filp->f_op);
744 up(&inode->i_sem); 733 mutex_unlock(&inode->i_mutex);
745 734
746 return err; 735 return err;
747} 736}
748 737
749void 738int
750nfsd_sync_dir(struct dentry *dp) 739nfsd_sync_dir(struct dentry *dp)
751{ 740{
752 nfsd_dosync(NULL, dp, dp->d_inode->i_fop); 741 return nfsd_dosync(NULL, dp, dp->d_inode->i_fop);
753} 742}
754 743
755/* 744/*
@@ -826,7 +815,7 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
826 return size; 815 return size;
827} 816}
828 817
829static inline int 818static int
830nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 819nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
831 loff_t offset, struct kvec *vec, int vlen, unsigned long *count) 820 loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
832{ 821{
@@ -885,12 +874,12 @@ static void kill_suid(struct dentry *dentry)
885 struct iattr ia; 874 struct iattr ia;
886 ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID; 875 ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID;
887 876
888 down(&dentry->d_inode->i_sem); 877 mutex_lock(&dentry->d_inode->i_mutex);
889 notify_change(dentry, &ia); 878 notify_change(dentry, &ia);
890 up(&dentry->d_inode->i_sem); 879 mutex_unlock(&dentry->d_inode->i_mutex);
891} 880}
892 881
893static inline int 882static int
894nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 883nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
895 loff_t offset, struct kvec *vec, int vlen, 884 loff_t offset, struct kvec *vec, int vlen,
896 unsigned long cnt, int *stablep) 885 unsigned long cnt, int *stablep)
@@ -902,9 +891,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
902 int err = 0; 891 int err = 0;
903 int stable = *stablep; 892 int stable = *stablep;
904 893
894#ifdef MSNFS
905 err = nfserr_perm; 895 err = nfserr_perm;
906 896
907#ifdef MSNFS
908 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && 897 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
909 (!lock_may_write(file->f_dentry->d_inode, offset, cnt))) 898 (!lock_may_write(file->f_dentry->d_inode, offset, cnt)))
910 goto out; 899 goto out;
@@ -1076,7 +1065,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
1076 return err; 1065 return err;
1077 if (EX_ISSYNC(fhp->fh_export)) { 1066 if (EX_ISSYNC(fhp->fh_export)) {
1078 if (file->f_op && file->f_op->fsync) { 1067 if (file->f_op && file->f_op->fsync) {
1079 err = nfsd_sync(file); 1068 err = nfserrno(nfsd_sync(file));
1080 } else { 1069 } else {
1081 err = nfserr_notsupp; 1070 err = nfserr_notsupp;
1082 } 1071 }
@@ -1144,7 +1133,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1144 "nfsd_create: parent %s/%s not locked!\n", 1133 "nfsd_create: parent %s/%s not locked!\n",
1145 dentry->d_parent->d_name.name, 1134 dentry->d_parent->d_name.name,
1146 dentry->d_name.name); 1135 dentry->d_name.name);
1147 err = -EIO; 1136 err = nfserr_io;
1148 goto out; 1137 goto out;
1149 } 1138 }
1150 } 1139 }
@@ -1187,7 +1176,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1187 goto out_nfserr; 1176 goto out_nfserr;
1188 1177
1189 if (EX_ISSYNC(fhp->fh_export)) { 1178 if (EX_ISSYNC(fhp->fh_export)) {
1190 nfsd_sync_dir(dentry); 1179 err = nfserrno(nfsd_sync_dir(dentry));
1191 write_inode_now(dchild->d_inode, 1); 1180 write_inode_now(dchild->d_inode, 1);
1192 } 1181 }
1193 1182
@@ -1197,9 +1186,11 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1197 * send along the gid when it tries to implement setgid 1186 * send along the gid when it tries to implement setgid
1198 * directories via NFS. 1187 * directories via NFS.
1199 */ 1188 */
1200 err = 0; 1189 if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) {
1201 if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) 1190 int err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
1202 err = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); 1191 if (err2)
1192 err = err2;
1193 }
1203 /* 1194 /*
1204 * Update the file handle to get the new inode info. 1195 * Update the file handle to get the new inode info.
1205 */ 1196 */
@@ -1318,17 +1309,10 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1318 goto out_nfserr; 1309 goto out_nfserr;
1319 1310
1320 if (EX_ISSYNC(fhp->fh_export)) { 1311 if (EX_ISSYNC(fhp->fh_export)) {
1321 nfsd_sync_dir(dentry); 1312 err = nfserrno(nfsd_sync_dir(dentry));
1322 /* setattr will sync the child (or not) */ 1313 /* setattr will sync the child (or not) */
1323 } 1314 }
1324 1315
1325 /*
1326 * Update the filehandle to get the new inode info.
1327 */
1328 err = fh_update(resfhp);
1329 if (err)
1330 goto out;
1331
1332 if (createmode == NFS3_CREATE_EXCLUSIVE) { 1316 if (createmode == NFS3_CREATE_EXCLUSIVE) {
1333 /* Cram the verifier into atime/mtime/mode */ 1317 /* Cram the verifier into atime/mtime/mode */
1334 iap->ia_valid = ATTR_MTIME|ATTR_ATIME 1318 iap->ia_valid = ATTR_MTIME|ATTR_ATIME
@@ -1349,8 +1333,17 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1349 * implement setgid directories via NFS. Clear out all that cruft. 1333 * implement setgid directories via NFS. Clear out all that cruft.
1350 */ 1334 */
1351 set_attr: 1335 set_attr:
1352 if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID)) != 0) 1336 if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID)) != 0) {
1353 err = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); 1337 int err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
1338 if (err2)
1339 err = err2;
1340 }
1341
1342 /*
1343 * Update the filehandle to get the new inode info.
1344 */
1345 if (!err)
1346 err = fh_update(resfhp);
1354 1347
1355 out: 1348 out:
1356 fh_unlock(fhp); 1349 fh_unlock(fhp);
@@ -1459,10 +1452,10 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1459 } else 1452 } else
1460 err = vfs_symlink(dentry->d_inode, dnew, path, mode); 1453 err = vfs_symlink(dentry->d_inode, dnew, path, mode);
1461 1454
1462 if (!err) { 1455 if (!err)
1463 if (EX_ISSYNC(fhp->fh_export)) 1456 if (EX_ISSYNC(fhp->fh_export))
1464 nfsd_sync_dir(dentry); 1457 err = nfsd_sync_dir(dentry);
1465 } else 1458 if (err)
1466 err = nfserrno(err); 1459 err = nfserrno(err);
1467 fh_unlock(fhp); 1460 fh_unlock(fhp);
1468 1461
@@ -1518,7 +1511,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1518 err = vfs_link(dold, dirp, dnew); 1511 err = vfs_link(dold, dirp, dnew);
1519 if (!err) { 1512 if (!err) {
1520 if (EX_ISSYNC(ffhp->fh_export)) { 1513 if (EX_ISSYNC(ffhp->fh_export)) {
1521 nfsd_sync_dir(ddir); 1514 err = nfserrno(nfsd_sync_dir(ddir));
1522 write_inode_now(dest, 1); 1515 write_inode_now(dest, 1);
1523 } 1516 }
1524 } else { 1517 } else {
@@ -1602,13 +1595,14 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1602 if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) && 1595 if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
1603 ((atomic_read(&odentry->d_count) > 1) 1596 ((atomic_read(&odentry->d_count) > 1)
1604 || (atomic_read(&ndentry->d_count) > 1))) { 1597 || (atomic_read(&ndentry->d_count) > 1))) {
1605 err = nfserr_perm; 1598 err = -EPERM;
1606 } else 1599 } else
1607#endif 1600#endif
1608 err = vfs_rename(fdir, odentry, tdir, ndentry); 1601 err = vfs_rename(fdir, odentry, tdir, ndentry);
1609 if (!err && EX_ISSYNC(tfhp->fh_export)) { 1602 if (!err && EX_ISSYNC(tfhp->fh_export)) {
1610 nfsd_sync_dir(tdentry); 1603 err = nfsd_sync_dir(tdentry);
1611 nfsd_sync_dir(fdentry); 1604 if (!err)
1605 err = nfsd_sync_dir(fdentry);
1612 } 1606 }
1613 1607
1614 out_dput_new: 1608 out_dput_new:
@@ -1673,7 +1667,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1673#ifdef MSNFS 1667#ifdef MSNFS
1674 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && 1668 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
1675 (atomic_read(&rdentry->d_count) > 1)) { 1669 (atomic_read(&rdentry->d_count) > 1)) {
1676 err = nfserr_perm; 1670 err = -EPERM;
1677 } else 1671 } else
1678#endif 1672#endif
1679 err = vfs_unlink(dirp, rdentry); 1673 err = vfs_unlink(dirp, rdentry);
@@ -1683,17 +1677,14 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1683 1677
1684 dput(rdentry); 1678 dput(rdentry);
1685 1679
1686 if (err) 1680 if (err == 0 &&
1687 goto out_nfserr; 1681 EX_ISSYNC(fhp->fh_export))
1688 if (EX_ISSYNC(fhp->fh_export)) 1682 err = nfsd_sync_dir(dentry);
1689 nfsd_sync_dir(dentry);
1690
1691out:
1692 return err;
1693 1683
1694out_nfserr: 1684out_nfserr:
1695 err = nfserrno(err); 1685 err = nfserrno(err);
1696 goto out; 1686out:
1687 return err;
1697} 1688}
1698 1689
1699/* 1690/*
@@ -1884,39 +1875,25 @@ nfsd_get_posix_acl(struct svc_fh *fhp, int type)
1884 ssize_t size; 1875 ssize_t size;
1885 struct posix_acl *acl; 1876 struct posix_acl *acl;
1886 1877
1887 if (!IS_POSIXACL(inode) || !inode->i_op || !inode->i_op->getxattr) 1878 if (!IS_POSIXACL(inode))
1879 return ERR_PTR(-EOPNOTSUPP);
1880
1881 switch (type) {
1882 case ACL_TYPE_ACCESS:
1883 name = POSIX_ACL_XATTR_ACCESS;
1884 break;
1885 case ACL_TYPE_DEFAULT:
1886 name = POSIX_ACL_XATTR_DEFAULT;
1887 break;
1888 default:
1888 return ERR_PTR(-EOPNOTSUPP); 1889 return ERR_PTR(-EOPNOTSUPP);
1889 switch(type) {
1890 case ACL_TYPE_ACCESS:
1891 name = POSIX_ACL_XATTR_ACCESS;
1892 break;
1893 case ACL_TYPE_DEFAULT:
1894 name = POSIX_ACL_XATTR_DEFAULT;
1895 break;
1896 default:
1897 return ERR_PTR(-EOPNOTSUPP);
1898 } 1890 }
1899 1891
1900 size = inode->i_op->getxattr(fhp->fh_dentry, name, NULL, 0); 1892 size = nfsd_getxattr(fhp->fh_dentry, name, &value);
1893 if (size < 0)
1894 return ERR_PTR(size);
1901 1895
1902 if (size < 0) {
1903 acl = ERR_PTR(size);
1904 goto getout;
1905 } else if (size > 0) {
1906 value = kmalloc(size, GFP_KERNEL);
1907 if (!value) {
1908 acl = ERR_PTR(-ENOMEM);
1909 goto getout;
1910 }
1911 size = inode->i_op->getxattr(fhp->fh_dentry, name, value, size);
1912 if (size < 0) {
1913 acl = ERR_PTR(size);
1914 goto getout;
1915 }
1916 }
1917 acl = posix_acl_from_xattr(value, size); 1896 acl = posix_acl_from_xattr(value, size);
1918
1919getout:
1920 kfree(value); 1897 kfree(value);
1921 return acl; 1898 return acl;
1922} 1899}
@@ -1957,16 +1934,13 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
1957 } else 1934 } else
1958 size = 0; 1935 size = 0;
1959 1936
1960 if (!fhp->fh_locked)
1961 fh_lock(fhp); /* unlocking is done automatically */
1962 if (size) 1937 if (size)
1963 error = inode->i_op->setxattr(fhp->fh_dentry, name, 1938 error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0);
1964 value, size, 0);
1965 else { 1939 else {
1966 if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) 1940 if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT)
1967 error = 0; 1941 error = 0;
1968 else { 1942 else {
1969 error = inode->i_op->removexattr(fhp->fh_dentry, name); 1943 error = vfs_removexattr(fhp->fh_dentry, name);
1970 if (error == -ENODATA) 1944 if (error == -ENODATA)
1971 error = 0; 1945 error = 0;
1972 } 1946 }
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index eda056bac256..9480a0526cd3 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1532,7 +1532,7 @@ int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
1532 * NOTE to self: No changes in the attribute list are required to move from 1532 * NOTE to self: No changes in the attribute list are required to move from
1533 * a resident to a non-resident attribute. 1533 * a resident to a non-resident attribute.
1534 * 1534 *
1535 * Locking: - The caller must hold i_sem on the inode. 1535 * Locking: - The caller must hold i_mutex on the inode.
1536 */ 1536 */
1537int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size) 1537int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
1538{ 1538{
@@ -1728,7 +1728,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
1728 /* 1728 /*
1729 * This needs to be last since the address space operations ->readpage 1729 * This needs to be last since the address space operations ->readpage
1730 * and ->writepage can run concurrently with us as they are not 1730 * and ->writepage can run concurrently with us as they are not
1731 * serialized on i_sem. Note, we are not allowed to fail once we flip 1731 * serialized on i_mutex. Note, we are not allowed to fail once we flip
1732 * this switch, which is another reason to do this last. 1732 * this switch, which is another reason to do this last.
1733 */ 1733 */
1734 NInoSetNonResident(ni); 1734 NInoSetNonResident(ni);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 795c3d1930f5..b0690d4c8906 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -69,7 +69,7 @@ ntfschar I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'),
69 * work but we don't care for how quickly one can access them. This also fixes 69 * work but we don't care for how quickly one can access them. This also fixes
70 * the dcache aliasing issues. 70 * the dcache aliasing issues.
71 * 71 *
72 * Locking: - Caller must hold i_sem on the directory. 72 * Locking: - Caller must hold i_mutex on the directory.
73 * - Each page cache page in the index allocation mapping must be 73 * - Each page cache page in the index allocation mapping must be
74 * locked whilst being accessed otherwise we may find a corrupt 74 * locked whilst being accessed otherwise we may find a corrupt
75 * page due to it being under ->writepage at the moment which 75 * page due to it being under ->writepage at the moment which
@@ -1085,11 +1085,11 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
1085 * While this will return the names in random order this doesn't matter for 1085 * While this will return the names in random order this doesn't matter for
1086 * ->readdir but OTOH results in a faster ->readdir. 1086 * ->readdir but OTOH results in a faster ->readdir.
1087 * 1087 *
1088 * VFS calls ->readdir without BKL but with i_sem held. This protects the VFS 1088 * VFS calls ->readdir without BKL but with i_mutex held. This protects the VFS
1089 * parts (e.g. ->f_pos and ->i_size, and it also protects against directory 1089 * parts (e.g. ->f_pos and ->i_size, and it also protects against directory
1090 * modifications). 1090 * modifications).
1091 * 1091 *
1092 * Locking: - Caller must hold i_sem on the directory. 1092 * Locking: - Caller must hold i_mutex on the directory.
1093 * - Each page cache page in the index allocation mapping must be 1093 * - Each page cache page in the index allocation mapping must be
1094 * locked whilst being accessed otherwise we may find a corrupt 1094 * locked whilst being accessed otherwise we may find a corrupt
1095 * page due to it being under ->writepage at the moment which 1095 * page due to it being under ->writepage at the moment which
@@ -1520,7 +1520,7 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp)
1520 * Note: In the past @filp could be NULL so we ignore it as we don't need it 1520 * Note: In the past @filp could be NULL so we ignore it as we don't need it
1521 * anyway. 1521 * anyway.
1522 * 1522 *
1523 * Locking: Caller must hold i_sem on the inode. 1523 * Locking: Caller must hold i_mutex on the inode.
1524 * 1524 *
1525 * TODO: We should probably also write all attribute/index inodes associated 1525 * TODO: We should probably also write all attribute/index inodes associated
1526 * with this inode but since we have no simple way of getting to them we ignore 1526 * with this inode but since we have no simple way of getting to them we ignore
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 727533891813..fb413d3d8618 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -106,7 +106,7 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
106 * this is the case, the necessary zeroing will also have happened and that all 106 * this is the case, the necessary zeroing will also have happened and that all
107 * metadata is self-consistent. 107 * metadata is self-consistent.
108 * 108 *
109 * Locking: i_sem on the vfs inode corrseponsind to the ntfs inode @ni must be 109 * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be
110 * held by the caller. 110 * held by the caller.
111 */ 111 */
112static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size, 112static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size,
@@ -473,7 +473,7 @@ static inline int ntfs_submit_bh_for_read(struct buffer_head *bh)
473 * @bytes: number of bytes to be written 473 * @bytes: number of bytes to be written
474 * 474 *
475 * This is called for non-resident attributes from ntfs_file_buffered_write() 475 * This is called for non-resident attributes from ntfs_file_buffered_write()
476 * with i_sem held on the inode (@pages[0]->mapping->host). There are 476 * with i_mutex held on the inode (@pages[0]->mapping->host). There are
477 * @nr_pages pages in @pages which are locked but not kmap()ped. The source 477 * @nr_pages pages in @pages which are locked but not kmap()ped. The source
478 * data has not yet been copied into the @pages. 478 * data has not yet been copied into the @pages.
479 * 479 *
@@ -1637,7 +1637,7 @@ err_out:
1637 * @pos: byte position in file at which the write begins 1637 * @pos: byte position in file at which the write begins
1638 * @bytes: number of bytes to be written 1638 * @bytes: number of bytes to be written
1639 * 1639 *
1640 * This is called from ntfs_file_buffered_write() with i_sem held on the inode 1640 * This is called from ntfs_file_buffered_write() with i_mutex held on the inode
1641 * (@pages[0]->mapping->host). There are @nr_pages pages in @pages which are 1641 * (@pages[0]->mapping->host). There are @nr_pages pages in @pages which are
1642 * locked but not kmap()ped. The source data has already been copied into the 1642 * locked but not kmap()ped. The source data has already been copied into the
1643 * @page. ntfs_prepare_pages_for_non_resident_write() has been called before 1643 * @page. ntfs_prepare_pages_for_non_resident_write() has been called before
@@ -1814,7 +1814,7 @@ err_out:
1814/** 1814/**
1815 * ntfs_file_buffered_write - 1815 * ntfs_file_buffered_write -
1816 * 1816 *
1817 * Locking: The vfs is holding ->i_sem on the inode. 1817 * Locking: The vfs is holding ->i_mutex on the inode.
1818 */ 1818 */
1819static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, 1819static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1820 const struct iovec *iov, unsigned long nr_segs, 1820 const struct iovec *iov, unsigned long nr_segs,
@@ -2173,7 +2173,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
2173 err = remove_suid(file->f_dentry); 2173 err = remove_suid(file->f_dentry);
2174 if (err) 2174 if (err)
2175 goto out; 2175 goto out;
2176 inode_update_time(inode, 1); 2176 file_update_time(file);
2177 written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos, 2177 written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
2178 count); 2178 count);
2179out: 2179out:
@@ -2196,9 +2196,9 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const char __user *buf,
2196 2196
2197 BUG_ON(iocb->ki_pos != pos); 2197 BUG_ON(iocb->ki_pos != pos);
2198 2198
2199 down(&inode->i_sem); 2199 mutex_lock(&inode->i_mutex);
2200 ret = ntfs_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); 2200 ret = ntfs_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
2201 up(&inode->i_sem); 2201 mutex_unlock(&inode->i_mutex);
2202 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 2202 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
2203 int err = sync_page_range(inode, mapping, pos, ret); 2203 int err = sync_page_range(inode, mapping, pos, ret);
2204 if (err < 0) 2204 if (err < 0)
@@ -2221,12 +2221,12 @@ static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov,
2221 struct kiocb kiocb; 2221 struct kiocb kiocb;
2222 ssize_t ret; 2222 ssize_t ret;
2223 2223
2224 down(&inode->i_sem); 2224 mutex_lock(&inode->i_mutex);
2225 init_sync_kiocb(&kiocb, file); 2225 init_sync_kiocb(&kiocb, file);
2226 ret = ntfs_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); 2226 ret = ntfs_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
2227 if (ret == -EIOCBQUEUED) 2227 if (ret == -EIOCBQUEUED)
2228 ret = wait_on_sync_kiocb(&kiocb); 2228 ret = wait_on_sync_kiocb(&kiocb);
2229 up(&inode->i_sem); 2229 mutex_unlock(&inode->i_mutex);
2230 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 2230 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
2231 int err = sync_page_range(inode, mapping, *ppos - ret, ret); 2231 int err = sync_page_range(inode, mapping, *ppos - ret, ret);
2232 if (err < 0) 2232 if (err < 0)
@@ -2269,7 +2269,7 @@ static ssize_t ntfs_file_write(struct file *file, const char __user *buf,
2269 * Note: In the past @filp could be NULL so we ignore it as we don't need it 2269 * Note: In the past @filp could be NULL so we ignore it as we don't need it
2270 * anyway. 2270 * anyway.
2271 * 2271 *
2272 * Locking: Caller must hold i_sem on the inode. 2272 * Locking: Caller must hold i_mutex on the inode.
2273 * 2273 *
2274 * TODO: We should probably also write all attribute/index inodes associated 2274 * TODO: We should probably also write all attribute/index inodes associated
2275 * with this inode but since we have no simple way of getting to them we ignore 2275 * with this inode but since we have no simple way of getting to them we ignore
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index 8f2d5727546f..9f5427c2d105 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -32,7 +32,7 @@
32 * Allocate a new index context, initialize it with @idx_ni and return it. 32 * Allocate a new index context, initialize it with @idx_ni and return it.
33 * Return NULL if allocation failed. 33 * Return NULL if allocation failed.
34 * 34 *
35 * Locking: Caller must hold i_sem on the index inode. 35 * Locking: Caller must hold i_mutex on the index inode.
36 */ 36 */
37ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni) 37ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni)
38{ 38{
@@ -50,7 +50,7 @@ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni)
50 * 50 *
51 * Release the index context @ictx, releasing all associated resources. 51 * Release the index context @ictx, releasing all associated resources.
52 * 52 *
53 * Locking: Caller must hold i_sem on the index inode. 53 * Locking: Caller must hold i_mutex on the index inode.
54 */ 54 */
55void ntfs_index_ctx_put(ntfs_index_context *ictx) 55void ntfs_index_ctx_put(ntfs_index_context *ictx)
56{ 56{
@@ -106,7 +106,7 @@ void ntfs_index_ctx_put(ntfs_index_context *ictx)
106 * or ntfs_index_entry_write() before the call to ntfs_index_ctx_put() to 106 * or ntfs_index_entry_write() before the call to ntfs_index_ctx_put() to
107 * ensure that the changes are written to disk. 107 * ensure that the changes are written to disk.
108 * 108 *
109 * Locking: - Caller must hold i_sem on the index inode. 109 * Locking: - Caller must hold i_mutex on the index inode.
110 * - Each page cache page in the index allocation mapping must be 110 * - Each page cache page in the index allocation mapping must be
111 * locked whilst being accessed otherwise we may find a corrupt 111 * locked whilst being accessed otherwise we may find a corrupt
112 * page due to it being under ->writepage at the moment which 112 * page due to it being under ->writepage at the moment which
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index b24f4c4b2c5c..ea1bd3feea1b 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2125,13 +2125,13 @@ void ntfs_put_inode(struct inode *vi)
2125 ntfs_inode *ni = NTFS_I(vi); 2125 ntfs_inode *ni = NTFS_I(vi);
2126 if (NInoIndexAllocPresent(ni)) { 2126 if (NInoIndexAllocPresent(ni)) {
2127 struct inode *bvi = NULL; 2127 struct inode *bvi = NULL;
2128 down(&vi->i_sem); 2128 mutex_lock(&vi->i_mutex);
2129 if (atomic_read(&vi->i_count) == 2) { 2129 if (atomic_read(&vi->i_count) == 2) {
2130 bvi = ni->itype.index.bmp_ino; 2130 bvi = ni->itype.index.bmp_ino;
2131 if (bvi) 2131 if (bvi)
2132 ni->itype.index.bmp_ino = NULL; 2132 ni->itype.index.bmp_ino = NULL;
2133 } 2133 }
2134 up(&vi->i_sem); 2134 mutex_unlock(&vi->i_mutex);
2135 if (bvi) 2135 if (bvi)
2136 iput(bvi); 2136 iput(bvi);
2137 } 2137 }
@@ -2311,7 +2311,7 @@ static const char *es = " Leaving inconsistent metadata. Unmount and run "
2311 * 2311 *
2312 * Returns 0 on success or -errno on error. 2312 * Returns 0 on success or -errno on error.
2313 * 2313 *
2314 * Called with ->i_sem held. In all but one case ->i_alloc_sem is held for 2314 * Called with ->i_mutex held. In all but one case ->i_alloc_sem is held for
2315 * writing. The only case in the kernel where ->i_alloc_sem is not held is 2315 * writing. The only case in the kernel where ->i_alloc_sem is not held is
2316 * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called 2316 * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called
2317 * with the current i_size as the offset. The analogous place in NTFS is in 2317 * with the current i_size as the offset. The analogous place in NTFS is in
@@ -2767,7 +2767,25 @@ unm_done:
2767 up_write(&ni->runlist.lock); 2767 up_write(&ni->runlist.lock);
2768done: 2768done:
2769 /* Update the mtime and ctime on the base inode. */ 2769 /* Update the mtime and ctime on the base inode. */
2770 inode_update_time(VFS_I(base_ni), 1); 2770 /* normally ->truncate shouldn't update ctime or mtime,
2771 * but ntfs did before so it got a copy & paste version
2772 * of file_update_time. one day someone should fix this
2773 * for real.
2774 */
2775 if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) {
2776 struct timespec now = current_fs_time(VFS_I(base_ni)->i_sb);
2777 int sync_it = 0;
2778
2779 if (!timespec_equal(&VFS_I(base_ni)->i_mtime, &now) ||
2780 !timespec_equal(&VFS_I(base_ni)->i_ctime, &now))
2781 sync_it = 1;
2782 VFS_I(base_ni)->i_mtime = now;
2783 VFS_I(base_ni)->i_ctime = now;
2784
2785 if (sync_it)
2786 mark_inode_dirty_sync(VFS_I(base_ni));
2787 }
2788
2771 if (likely(!err)) { 2789 if (likely(!err)) {
2772 NInoClearTruncateFailed(ni); 2790 NInoClearTruncateFailed(ni);
2773 ntfs_debug("Done."); 2791 ntfs_debug("Done.");
@@ -2831,7 +2849,7 @@ void ntfs_truncate_vfs(struct inode *vi) {
2831 * We also abort all changes of user, group, and mode as we do not implement 2849 * We also abort all changes of user, group, and mode as we do not implement
2832 * the NTFS ACLs yet. 2850 * the NTFS ACLs yet.
2833 * 2851 *
2834 * Called with ->i_sem held. For the ATTR_SIZE (i.e. ->truncate) case, also 2852 * Called with ->i_mutex held. For the ATTR_SIZE (i.e. ->truncate) case, also
2835 * called with ->i_alloc_sem held for writing. 2853 * called with ->i_alloc_sem held for writing.
2836 * 2854 *
2837 * Basically this is a copy of generic notify_change() and inode_setattr() 2855 * Basically this is a copy of generic notify_change() and inode_setattr()
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 351dbc3b6e40..5ea9eb93af62 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -96,7 +96,7 @@
96 * name. We then convert the name to the current NLS code page, and proceed 96 * name. We then convert the name to the current NLS code page, and proceed
97 * searching for a dentry with this name, etc, as in case 2), above. 97 * searching for a dentry with this name, etc, as in case 2), above.
98 * 98 *
99 * Locking: Caller must hold i_sem on the directory. 99 * Locking: Caller must hold i_mutex on the directory.
100 */ 100 */
101static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, 101static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
102 struct nameidata *nd) 102 struct nameidata *nd)
@@ -254,7 +254,7 @@ handle_name:
254 nls_name.hash = full_name_hash(nls_name.name, nls_name.len); 254 nls_name.hash = full_name_hash(nls_name.name, nls_name.len);
255 255
256 /* 256 /*
257 * Note: No need for dent->d_lock lock as i_sem is held on the 257 * Note: No need for dent->d_lock lock as i_mutex is held on the
258 * parent inode. 258 * parent inode.
259 */ 259 */
260 260
@@ -374,7 +374,7 @@ struct inode_operations ntfs_dir_inode_ops = {
374 * The code is based on the ext3 ->get_parent() implementation found in 374 * The code is based on the ext3 ->get_parent() implementation found in
375 * fs/ext3/namei.c::ext3_get_parent(). 375 * fs/ext3/namei.c::ext3_get_parent().
376 * 376 *
377 * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_sem down. 377 * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_mutex down.
378 * 378 *
379 * Return the dentry of the parent directory on success or the error code on 379 * Return the dentry of the parent directory on success or the error code on
380 * error (IS_ERR() is true). 380 * error (IS_ERR() is true).
diff --git a/fs/ntfs/quota.c b/fs/ntfs/quota.c
index 833df2a4e9fb..d0ef4182147b 100644
--- a/fs/ntfs/quota.c
+++ b/fs/ntfs/quota.c
@@ -48,7 +48,7 @@ BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
48 ntfs_error(vol->sb, "Quota inodes are not open."); 48 ntfs_error(vol->sb, "Quota inodes are not open.");
49 return FALSE; 49 return FALSE;
50 } 50 }
51 down(&vol->quota_q_ino->i_sem); 51 mutex_lock(&vol->quota_q_ino->i_mutex);
52 ictx = ntfs_index_ctx_get(NTFS_I(vol->quota_q_ino)); 52 ictx = ntfs_index_ctx_get(NTFS_I(vol->quota_q_ino));
53 if (!ictx) { 53 if (!ictx) {
54 ntfs_error(vol->sb, "Failed to get index context."); 54 ntfs_error(vol->sb, "Failed to get index context.");
@@ -98,7 +98,7 @@ BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
98 ntfs_index_entry_mark_dirty(ictx); 98 ntfs_index_entry_mark_dirty(ictx);
99set_done: 99set_done:
100 ntfs_index_ctx_put(ictx); 100 ntfs_index_ctx_put(ictx);
101 up(&vol->quota_q_ino->i_sem); 101 mutex_unlock(&vol->quota_q_ino->i_mutex);
102 /* 102 /*
103 * We set the flag so we do not try to mark the quotas out of date 103 * We set the flag so we do not try to mark the quotas out of date
104 * again on remount. 104 * again on remount.
@@ -110,7 +110,7 @@ done:
110err_out: 110err_out:
111 if (ictx) 111 if (ictx)
112 ntfs_index_ctx_put(ictx); 112 ntfs_index_ctx_put(ictx);
113 up(&vol->quota_q_ino->i_sem); 113 mutex_unlock(&vol->quota_q_ino->i_mutex);
114 return FALSE; 114 return FALSE;
115} 115}
116 116
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 6c16db9e1a8a..c3a3f1a8310b 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -443,8 +443,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
443 443
444 ntfs_debug("Entering with remount options string: %s", opt); 444 ntfs_debug("Entering with remount options string: %s", opt);
445#ifndef NTFS_RW 445#ifndef NTFS_RW
446 /* For read-only compiled driver, enforce all read-only flags. */ 446 /* For read-only compiled driver, enforce read-only flag. */
447 *flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 447 *flags |= MS_RDONLY;
448#else /* NTFS_RW */ 448#else /* NTFS_RW */
449 /* 449 /*
450 * For the read-write compiled driver, if we are remounting read-write, 450 * For the read-write compiled driver, if we are remounting read-write,
@@ -1213,10 +1213,10 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
1213 * Find the inode number for the hibernation file by looking up the 1213 * Find the inode number for the hibernation file by looking up the
1214 * filename hiberfil.sys in the root directory. 1214 * filename hiberfil.sys in the root directory.
1215 */ 1215 */
1216 down(&vol->root_ino->i_sem); 1216 mutex_lock(&vol->root_ino->i_mutex);
1217 mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12, 1217 mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12,
1218 &name); 1218 &name);
1219 up(&vol->root_ino->i_sem); 1219 mutex_unlock(&vol->root_ino->i_mutex);
1220 if (IS_ERR_MREF(mref)) { 1220 if (IS_ERR_MREF(mref)) {
1221 ret = MREF_ERR(mref); 1221 ret = MREF_ERR(mref);
1222 /* If the file does not exist, Windows is not hibernated. */ 1222 /* If the file does not exist, Windows is not hibernated. */
@@ -1307,10 +1307,10 @@ static BOOL load_and_init_quota(ntfs_volume *vol)
1307 * Find the inode number for the quota file by looking up the filename 1307 * Find the inode number for the quota file by looking up the filename
1308 * $Quota in the extended system files directory $Extend. 1308 * $Quota in the extended system files directory $Extend.
1309 */ 1309 */
1310 down(&vol->extend_ino->i_sem); 1310 mutex_lock(&vol->extend_ino->i_mutex);
1311 mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), Quota, 6, 1311 mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), Quota, 6,
1312 &name); 1312 &name);
1313 up(&vol->extend_ino->i_sem); 1313 mutex_unlock(&vol->extend_ino->i_mutex);
1314 if (IS_ERR_MREF(mref)) { 1314 if (IS_ERR_MREF(mref)) {
1315 /* 1315 /*
1316 * If the file does not exist, quotas are disabled and have 1316 * If the file does not exist, quotas are disabled and have
@@ -1390,10 +1390,10 @@ static BOOL load_and_init_usnjrnl(ntfs_volume *vol)
1390 * Find the inode number for the transaction log file by looking up the 1390 * Find the inode number for the transaction log file by looking up the
1391 * filename $UsnJrnl in the extended system files directory $Extend. 1391 * filename $UsnJrnl in the extended system files directory $Extend.
1392 */ 1392 */
1393 down(&vol->extend_ino->i_sem); 1393 mutex_lock(&vol->extend_ino->i_mutex);
1394 mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8, 1394 mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8,
1395 &name); 1395 &name);
1396 up(&vol->extend_ino->i_sem); 1396 mutex_unlock(&vol->extend_ino->i_mutex);
1397 if (IS_ERR_MREF(mref)) { 1397 if (IS_ERR_MREF(mref)) {
1398 /* 1398 /*
1399 * If the file does not exist, transaction logging is disabled, 1399 * If the file does not exist, transaction logging is disabled,
@@ -1721,7 +1721,7 @@ static BOOL load_system_files(ntfs_volume *vol)
1721 es3); 1721 es3);
1722 goto iput_mirr_err_out; 1722 goto iput_mirr_err_out;
1723 } 1723 }
1724 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 1724 sb->s_flags |= MS_RDONLY;
1725 ntfs_error(sb, "%s. Mounting read-only%s", 1725 ntfs_error(sb, "%s. Mounting read-only%s",
1726 !vol->mftmirr_ino ? es1 : es2, es3); 1726 !vol->mftmirr_ino ? es1 : es2, es3);
1727 } else 1727 } else
@@ -1837,7 +1837,7 @@ get_ctx_vol_failed:
1837 es1, es2); 1837 es1, es2);
1838 goto iput_vol_err_out; 1838 goto iput_vol_err_out;
1839 } 1839 }
1840 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 1840 sb->s_flags |= MS_RDONLY;
1841 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); 1841 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
1842 } else 1842 } else
1843 ntfs_warning(sb, "%s. Will not be able to remount " 1843 ntfs_warning(sb, "%s. Will not be able to remount "
@@ -1874,7 +1874,7 @@ get_ctx_vol_failed:
1874 } 1874 }
1875 goto iput_logfile_err_out; 1875 goto iput_logfile_err_out;
1876 } 1876 }
1877 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 1877 sb->s_flags |= MS_RDONLY;
1878 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); 1878 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
1879 } else 1879 } else
1880 ntfs_warning(sb, "%s. Will not be able to remount " 1880 ntfs_warning(sb, "%s. Will not be able to remount "
@@ -1919,7 +1919,7 @@ get_ctx_vol_failed:
1919 es1, es2); 1919 es1, es2);
1920 goto iput_root_err_out; 1920 goto iput_root_err_out;
1921 } 1921 }
1922 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 1922 sb->s_flags |= MS_RDONLY;
1923 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); 1923 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
1924 } else 1924 } else
1925 ntfs_warning(sb, "%s. Will not be able to remount " 1925 ntfs_warning(sb, "%s. Will not be able to remount "
@@ -1943,7 +1943,7 @@ get_ctx_vol_failed:
1943 goto iput_root_err_out; 1943 goto iput_root_err_out;
1944 } 1944 }
1945 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); 1945 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
1946 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 1946 sb->s_flags |= MS_RDONLY;
1947 /* 1947 /*
1948 * Do not set NVolErrors() because ntfs_remount() might manage 1948 * Do not set NVolErrors() because ntfs_remount() might manage
1949 * to set the dirty flag in which case all would be well. 1949 * to set the dirty flag in which case all would be well.
@@ -1970,7 +1970,7 @@ get_ctx_vol_failed:
1970 goto iput_root_err_out; 1970 goto iput_root_err_out;
1971 } 1971 }
1972 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); 1972 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
1973 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 1973 sb->s_flags |= MS_RDONLY;
1974 NVolSetErrors(vol); 1974 NVolSetErrors(vol);
1975 } 1975 }
1976#endif 1976#endif
@@ -1989,7 +1989,7 @@ get_ctx_vol_failed:
1989 goto iput_root_err_out; 1989 goto iput_root_err_out;
1990 } 1990 }
1991 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); 1991 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
1992 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 1992 sb->s_flags |= MS_RDONLY;
1993 NVolSetErrors(vol); 1993 NVolSetErrors(vol);
1994 } 1994 }
1995#endif /* NTFS_RW */ 1995#endif /* NTFS_RW */
@@ -2030,7 +2030,7 @@ get_ctx_vol_failed:
2030 es1, es2); 2030 es1, es2);
2031 goto iput_quota_err_out; 2031 goto iput_quota_err_out;
2032 } 2032 }
2033 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 2033 sb->s_flags |= MS_RDONLY;
2034 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); 2034 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
2035 } else 2035 } else
2036 ntfs_warning(sb, "%s. Will not be able to remount " 2036 ntfs_warning(sb, "%s. Will not be able to remount "
@@ -2053,7 +2053,7 @@ get_ctx_vol_failed:
2053 goto iput_quota_err_out; 2053 goto iput_quota_err_out;
2054 } 2054 }
2055 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); 2055 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
2056 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 2056 sb->s_flags |= MS_RDONLY;
2057 NVolSetErrors(vol); 2057 NVolSetErrors(vol);
2058 } 2058 }
2059 /* 2059 /*
@@ -2074,7 +2074,7 @@ get_ctx_vol_failed:
2074 es1, es2); 2074 es1, es2);
2075 goto iput_usnjrnl_err_out; 2075 goto iput_usnjrnl_err_out;
2076 } 2076 }
2077 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 2077 sb->s_flags |= MS_RDONLY;
2078 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); 2078 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
2079 } else 2079 } else
2080 ntfs_warning(sb, "%s. Will not be able to remount " 2080 ntfs_warning(sb, "%s. Will not be able to remount "
@@ -2097,7 +2097,7 @@ get_ctx_vol_failed:
2097 goto iput_usnjrnl_err_out; 2097 goto iput_usnjrnl_err_out;
2098 } 2098 }
2099 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); 2099 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
2100 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 2100 sb->s_flags |= MS_RDONLY;
2101 NVolSetErrors(vol); 2101 NVolSetErrors(vol);
2102 } 2102 }
2103#endif /* NTFS_RW */ 2103#endif /* NTFS_RW */
@@ -2312,9 +2312,9 @@ static void ntfs_put_super(struct super_block *sb)
2312 if (!list_empty(&sb->s_dirty)) { 2312 if (!list_empty(&sb->s_dirty)) {
2313 const char *s1, *s2; 2313 const char *s1, *s2;
2314 2314
2315 down(&vol->mft_ino->i_sem); 2315 mutex_lock(&vol->mft_ino->i_mutex);
2316 truncate_inode_pages(vol->mft_ino->i_mapping, 0); 2316 truncate_inode_pages(vol->mft_ino->i_mapping, 0);
2317 up(&vol->mft_ino->i_sem); 2317 mutex_unlock(&vol->mft_ino->i_mutex);
2318 write_inode_now(vol->mft_ino, 1); 2318 write_inode_now(vol->mft_ino, 1);
2319 if (!list_empty(&sb->s_dirty)) { 2319 if (!list_empty(&sb->s_dirty)) {
2320 static const char *_s1 = "inodes"; 2320 static const char *_s1 = "inodes";
@@ -2689,7 +2689,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2689 2689
2690 ntfs_debug("Entering."); 2690 ntfs_debug("Entering.");
2691#ifndef NTFS_RW 2691#ifndef NTFS_RW
2692 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 2692 sb->s_flags |= MS_RDONLY;
2693#endif /* ! NTFS_RW */ 2693#endif /* ! NTFS_RW */
2694 /* Allocate a new ntfs_volume and place it in sb->s_fs_info. */ 2694 /* Allocate a new ntfs_volume and place it in sb->s_fs_info. */
2695 sb->s_fs_info = kmalloc(sizeof(ntfs_volume), GFP_NOFS); 2695 sb->s_fs_info = kmalloc(sizeof(ntfs_volume), GFP_NOFS);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 465f797451ee..6b9812db3779 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -966,7 +966,7 @@ static int ocfs2_truncate_log_append(struct ocfs2_super *osb,
966 mlog_entry("start_blk = %"MLFu64", num_clusters = %u\n", start_blk, 966 mlog_entry("start_blk = %"MLFu64", num_clusters = %u\n", start_blk,
967 num_clusters); 967 num_clusters);
968 968
969 BUG_ON(!down_trylock(&tl_inode->i_sem)); 969 BUG_ON(mutex_trylock(&tl_inode->i_mutex));
970 970
971 start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk); 971 start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
972 972
@@ -1108,7 +1108,7 @@ bail:
1108 return status; 1108 return status;
1109} 1109}
1110 1110
1111/* Expects you to already be holding tl_inode->i_sem */ 1111/* Expects you to already be holding tl_inode->i_mutex */
1112static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) 1112static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
1113{ 1113{
1114 int status; 1114 int status;
@@ -1123,7 +1123,7 @@ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
1123 1123
1124 mlog_entry_void(); 1124 mlog_entry_void();
1125 1125
1126 BUG_ON(!down_trylock(&tl_inode->i_sem)); 1126 BUG_ON(mutex_trylock(&tl_inode->i_mutex));
1127 1127
1128 di = (struct ocfs2_dinode *) tl_bh->b_data; 1128 di = (struct ocfs2_dinode *) tl_bh->b_data;
1129 tl = &di->id2.i_dealloc; 1129 tl = &di->id2.i_dealloc;
@@ -1198,9 +1198,9 @@ int ocfs2_flush_truncate_log(struct ocfs2_super *osb)
1198 int status; 1198 int status;
1199 struct inode *tl_inode = osb->osb_tl_inode; 1199 struct inode *tl_inode = osb->osb_tl_inode;
1200 1200
1201 down(&tl_inode->i_sem); 1201 mutex_lock(&tl_inode->i_mutex);
1202 status = __ocfs2_flush_truncate_log(osb); 1202 status = __ocfs2_flush_truncate_log(osb);
1203 up(&tl_inode->i_sem); 1203 mutex_unlock(&tl_inode->i_mutex);
1204 1204
1205 return status; 1205 return status;
1206} 1206}
@@ -1363,7 +1363,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
1363 mlog(0, "cleanup %u records from %"MLFu64"\n", num_recs, 1363 mlog(0, "cleanup %u records from %"MLFu64"\n", num_recs,
1364 tl_copy->i_blkno); 1364 tl_copy->i_blkno);
1365 1365
1366 down(&tl_inode->i_sem); 1366 mutex_lock(&tl_inode->i_mutex);
1367 for(i = 0; i < num_recs; i++) { 1367 for(i = 0; i < num_recs; i++) {
1368 if (ocfs2_truncate_log_needs_flush(osb)) { 1368 if (ocfs2_truncate_log_needs_flush(osb)) {
1369 status = __ocfs2_flush_truncate_log(osb); 1369 status = __ocfs2_flush_truncate_log(osb);
@@ -1395,7 +1395,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
1395 } 1395 }
1396 1396
1397bail_up: 1397bail_up:
1398 up(&tl_inode->i_sem); 1398 mutex_unlock(&tl_inode->i_mutex);
1399 1399
1400 mlog_exit(status); 1400 mlog_exit(status);
1401 return status; 1401 return status;
@@ -1840,7 +1840,7 @@ start:
1840 1840
1841 mlog(0, "clusters_to_del = %u in this pass\n", clusters_to_del); 1841 mlog(0, "clusters_to_del = %u in this pass\n", clusters_to_del);
1842 1842
1843 down(&tl_inode->i_sem); 1843 mutex_lock(&tl_inode->i_mutex);
1844 tl_sem = 1; 1844 tl_sem = 1;
1845 /* ocfs2_truncate_log_needs_flush guarantees us at least one 1845 /* ocfs2_truncate_log_needs_flush guarantees us at least one
1846 * record is free for use. If there isn't any, we flush to get 1846 * record is free for use. If there isn't any, we flush to get
@@ -1875,7 +1875,7 @@ start:
1875 goto bail; 1875 goto bail;
1876 } 1876 }
1877 1877
1878 up(&tl_inode->i_sem); 1878 mutex_unlock(&tl_inode->i_mutex);
1879 tl_sem = 0; 1879 tl_sem = 0;
1880 1880
1881 ocfs2_commit_trans(handle); 1881 ocfs2_commit_trans(handle);
@@ -1890,7 +1890,7 @@ bail:
1890 ocfs2_schedule_truncate_log_flush(osb, 1); 1890 ocfs2_schedule_truncate_log_flush(osb, 1);
1891 1891
1892 if (tl_sem) 1892 if (tl_sem)
1893 up(&tl_inode->i_sem); 1893 mutex_unlock(&tl_inode->i_mutex);
1894 1894
1895 if (handle) 1895 if (handle)
1896 ocfs2_commit_trans(handle); 1896 ocfs2_commit_trans(handle);
@@ -1994,7 +1994,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
1994 goto bail; 1994 goto bail;
1995 } 1995 }
1996 1996
1997 down(&ext_alloc_inode->i_sem); 1997 mutex_lock(&ext_alloc_inode->i_mutex);
1998 (*tc)->tc_ext_alloc_inode = ext_alloc_inode; 1998 (*tc)->tc_ext_alloc_inode = ext_alloc_inode;
1999 1999
2000 status = ocfs2_meta_lock(ext_alloc_inode, 2000 status = ocfs2_meta_lock(ext_alloc_inode,
@@ -2026,7 +2026,7 @@ static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
2026 if (tc->tc_ext_alloc_locked) 2026 if (tc->tc_ext_alloc_locked)
2027 ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1); 2027 ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1);
2028 2028
2029 up(&tc->tc_ext_alloc_inode->i_sem); 2029 mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex);
2030 iput(tc->tc_ext_alloc_inode); 2030 iput(tc->tc_ext_alloc_inode);
2031 } 2031 }
2032 2032
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index f5ef5ea61a05..e8c56a3d9c64 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -212,11 +212,10 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
212 mlog(ML_ENTRY, "ENTRY:\n"); \ 212 mlog(ML_ENTRY, "ENTRY:\n"); \
213} while (0) 213} while (0)
214 214
215/* We disable this for old compilers since they don't have support for 215/*
216 * __builtin_types_compatible_p. 216 * We disable this for sparse.
217 */ 217 */
218#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) && \ 218#if !defined(__CHECKER__)
219 !defined(__CHECKER__)
220#define mlog_exit(st) do { \ 219#define mlog_exit(st) do { \
221 if (__builtin_types_compatible_p(typeof(st), unsigned long)) \ 220 if (__builtin_types_compatible_p(typeof(st), unsigned long)) \
222 mlog(ML_EXIT, "EXIT: %lu\n", (unsigned long) (st)); \ 221 mlog(ML_EXIT, "EXIT: %lu\n", (unsigned long) (st)); \
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 5fd60c105913..cf7828f23361 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -653,7 +653,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
653 struct config_group *o2hb_group = NULL, *ret = NULL; 653 struct config_group *o2hb_group = NULL, *ret = NULL;
654 void *defs = NULL; 654 void *defs = NULL;
655 655
656 /* this runs under the parent dir's i_sem; there can be only 656 /* this runs under the parent dir's i_mutex; there can be only
657 * one caller in here at a time */ 657 * one caller in here at a time */
658 if (o2nm_single_cluster) 658 if (o2nm_single_cluster)
659 goto out; /* ENOSPC */ 659 goto out; /* ENOSPC */
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 856e20ae8263..57158fa75d91 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -202,7 +202,7 @@ bail:
202} 202}
203 203
204/* 204/*
205 * NOTE: this should always be called with parent dir i_sem taken. 205 * NOTE: this should always be called with parent dir i_mutex taken.
206 */ 206 */
207int ocfs2_find_files_on_disk(const char *name, 207int ocfs2_find_files_on_disk(const char *name,
208 int namelen, 208 int namelen,
@@ -245,7 +245,7 @@ leave:
245 * Return 0 if the name does not exist 245 * Return 0 if the name does not exist
246 * Return -EEXIST if the directory contains the name 246 * Return -EEXIST if the directory contains the name
247 * 247 *
248 * Callers should have i_sem + a cluster lock on dir 248 * Callers should have i_mutex + a cluster lock on dir
249 */ 249 */
250int ocfs2_check_dir_for_entry(struct inode *dir, 250int ocfs2_check_dir_for_entry(struct inode *dir,
251 const char *name, 251 const char *name,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 72ae9e3306f4..eaf33caa0a1f 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -23,6 +23,7 @@
23 * Boston, MA 021110-1307, USA. 23 * Boston, MA 021110-1307, USA.
24 */ 24 */
25 25
26#include <linux/capability.h>
26#include <linux/fs.h> 27#include <linux/fs.h>
27#include <linux/types.h> 28#include <linux/types.h>
28#include <linux/slab.h> 29#include <linux/slab.h>
@@ -492,7 +493,7 @@ restart_all:
492 } 493 }
493 494
494 /* blocks peope in read/write from reading our allocation 495 /* blocks peope in read/write from reading our allocation
495 * until we're done changing it. We depend on i_sem to block 496 * until we're done changing it. We depend on i_mutex to block
496 * other extend/truncate calls while we're here. Ordering wrt 497 * other extend/truncate calls while we're here. Ordering wrt
497 * start_trans is important here -- always do it before! */ 498 * start_trans is important here -- always do it before! */
498 down_write(&OCFS2_I(inode)->ip_alloc_sem); 499 down_write(&OCFS2_I(inode)->ip_alloc_sem);
@@ -958,8 +959,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
958 filp->f_flags &= ~O_DIRECT; 959 filp->f_flags &= ~O_DIRECT;
959#endif 960#endif
960 961
961 down(&inode->i_sem); 962 mutex_lock(&inode->i_mutex);
962 /* to match setattr's i_sem -> i_alloc_sem -> rw_lock ordering */ 963 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
963 if (filp->f_flags & O_DIRECT) { 964 if (filp->f_flags & O_DIRECT) {
964 have_alloc_sem = 1; 965 have_alloc_sem = 1;
965 down_read(&inode->i_alloc_sem); 966 down_read(&inode->i_alloc_sem);
@@ -1123,7 +1124,7 @@ out:
1123 up_read(&inode->i_alloc_sem); 1124 up_read(&inode->i_alloc_sem);
1124 if (rw_level != -1) 1125 if (rw_level != -1)
1125 ocfs2_rw_unlock(inode, rw_level); 1126 ocfs2_rw_unlock(inode, rw_level);
1126 up(&inode->i_sem); 1127 mutex_unlock(&inode->i_mutex);
1127 1128
1128 mlog_exit(ret); 1129 mlog_exit(ret);
1129 return ret; 1130 return ret;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index a91ba4dec936..d4ecc0627716 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -485,10 +485,10 @@ static int ocfs2_remove_inode(struct inode *inode,
485 goto bail; 485 goto bail;
486 } 486 }
487 487
488 down(&inode_alloc_inode->i_sem); 488 mutex_lock(&inode_alloc_inode->i_mutex);
489 status = ocfs2_meta_lock(inode_alloc_inode, NULL, &inode_alloc_bh, 1); 489 status = ocfs2_meta_lock(inode_alloc_inode, NULL, &inode_alloc_bh, 1);
490 if (status < 0) { 490 if (status < 0) {
491 up(&inode_alloc_inode->i_sem); 491 mutex_unlock(&inode_alloc_inode->i_mutex);
492 492
493 mlog_errno(status); 493 mlog_errno(status);
494 goto bail; 494 goto bail;
@@ -536,7 +536,7 @@ bail_commit:
536 ocfs2_commit_trans(handle); 536 ocfs2_commit_trans(handle);
537bail_unlock: 537bail_unlock:
538 ocfs2_meta_unlock(inode_alloc_inode, 1); 538 ocfs2_meta_unlock(inode_alloc_inode, 1);
539 up(&inode_alloc_inode->i_sem); 539 mutex_unlock(&inode_alloc_inode->i_mutex);
540 brelse(inode_alloc_bh); 540 brelse(inode_alloc_bh);
541bail: 541bail:
542 iput(inode_alloc_inode); 542 iput(inode_alloc_inode);
@@ -567,10 +567,10 @@ static int ocfs2_wipe_inode(struct inode *inode,
567 /* Lock the orphan dir. The lock will be held for the entire 567 /* Lock the orphan dir. The lock will be held for the entire
568 * delete_inode operation. We do this now to avoid races with 568 * delete_inode operation. We do this now to avoid races with
569 * recovery completion on other nodes. */ 569 * recovery completion on other nodes. */
570 down(&orphan_dir_inode->i_sem); 570 mutex_lock(&orphan_dir_inode->i_mutex);
571 status = ocfs2_meta_lock(orphan_dir_inode, NULL, &orphan_dir_bh, 1); 571 status = ocfs2_meta_lock(orphan_dir_inode, NULL, &orphan_dir_bh, 1);
572 if (status < 0) { 572 if (status < 0) {
573 up(&orphan_dir_inode->i_sem); 573 mutex_unlock(&orphan_dir_inode->i_mutex);
574 574
575 mlog_errno(status); 575 mlog_errno(status);
576 goto bail; 576 goto bail;
@@ -593,7 +593,7 @@ static int ocfs2_wipe_inode(struct inode *inode,
593 593
594bail_unlock_dir: 594bail_unlock_dir:
595 ocfs2_meta_unlock(orphan_dir_inode, 1); 595 ocfs2_meta_unlock(orphan_dir_inode, 1);
596 up(&orphan_dir_inode->i_sem); 596 mutex_unlock(&orphan_dir_inode->i_mutex);
597 brelse(orphan_dir_bh); 597 brelse(orphan_dir_bh);
598bail: 598bail:
599 iput(orphan_dir_inode); 599 iput(orphan_dir_inode);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 04428042e5e5..303c8d96457f 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -216,7 +216,7 @@ void ocfs2_handle_add_inode(struct ocfs2_journal_handle *handle,
216 atomic_inc(&inode->i_count); 216 atomic_inc(&inode->i_count);
217 217
218 /* we're obviously changing it... */ 218 /* we're obviously changing it... */
219 down(&inode->i_sem); 219 mutex_lock(&inode->i_mutex);
220 220
221 /* sanity check */ 221 /* sanity check */
222 BUG_ON(OCFS2_I(inode)->ip_handle); 222 BUG_ON(OCFS2_I(inode)->ip_handle);
@@ -241,7 +241,7 @@ static void ocfs2_handle_unlock_inodes(struct ocfs2_journal_handle *handle)
241 OCFS2_I(inode)->ip_handle = NULL; 241 OCFS2_I(inode)->ip_handle = NULL;
242 list_del_init(&OCFS2_I(inode)->ip_handle_list); 242 list_del_init(&OCFS2_I(inode)->ip_handle_list);
243 243
244 up(&inode->i_sem); 244 mutex_unlock(&inode->i_mutex);
245 iput(inode); 245 iput(inode);
246 } 246 }
247} 247}
@@ -1433,10 +1433,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
1433 goto out; 1433 goto out;
1434 } 1434 }
1435 1435
1436 down(&orphan_dir_inode->i_sem); 1436 mutex_lock(&orphan_dir_inode->i_mutex);
1437 status = ocfs2_meta_lock(orphan_dir_inode, NULL, NULL, 0); 1437 status = ocfs2_meta_lock(orphan_dir_inode, NULL, NULL, 0);
1438 if (status < 0) { 1438 if (status < 0) {
1439 up(&orphan_dir_inode->i_sem); 1439 mutex_unlock(&orphan_dir_inode->i_mutex);
1440 mlog_errno(status); 1440 mlog_errno(status);
1441 goto out; 1441 goto out;
1442 } 1442 }
@@ -1451,7 +1451,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
1451 if (!bh) 1451 if (!bh)
1452 status = -EINVAL; 1452 status = -EINVAL;
1453 if (status < 0) { 1453 if (status < 0) {
1454 up(&orphan_dir_inode->i_sem); 1454 mutex_unlock(&orphan_dir_inode->i_mutex);
1455 if (bh) 1455 if (bh)
1456 brelse(bh); 1456 brelse(bh);
1457 mlog_errno(status); 1457 mlog_errno(status);
@@ -1465,7 +1465,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
1465 1465
1466 if (!ocfs2_check_dir_entry(orphan_dir_inode, 1466 if (!ocfs2_check_dir_entry(orphan_dir_inode,
1467 de, bh, local)) { 1467 de, bh, local)) {
1468 up(&orphan_dir_inode->i_sem); 1468 mutex_unlock(&orphan_dir_inode->i_mutex);
1469 status = -EINVAL; 1469 status = -EINVAL;
1470 mlog_errno(status); 1470 mlog_errno(status);
1471 brelse(bh); 1471 brelse(bh);
@@ -1509,7 +1509,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
1509 } 1509 }
1510 brelse(bh); 1510 brelse(bh);
1511 } 1511 }
1512 up(&orphan_dir_inode->i_sem); 1512 mutex_unlock(&orphan_dir_inode->i_mutex);
1513 1513
1514 ocfs2_meta_unlock(orphan_dir_inode, 0); 1514 ocfs2_meta_unlock(orphan_dir_inode, 0);
1515 have_disk_lock = 0; 1515 have_disk_lock = 0;
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index fe373a2101d9..149b35181666 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -334,7 +334,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
334 goto bail; 334 goto bail;
335 } 335 }
336 336
337 down(&inode->i_sem); 337 mutex_lock(&inode->i_mutex);
338 338
339 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, 339 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno,
340 &alloc_bh, 0, inode); 340 &alloc_bh, 0, inode);
@@ -367,7 +367,7 @@ bail:
367 brelse(alloc_bh); 367 brelse(alloc_bh);
368 368
369 if (inode) { 369 if (inode) {
370 up(&inode->i_sem); 370 mutex_unlock(&inode->i_mutex);
371 iput(inode); 371 iput(inode);
372 } 372 }
373 373
@@ -446,7 +446,7 @@ bail:
446 446
447/* 447/*
448 * make sure we've got at least bitswanted contiguous bits in the 448 * make sure we've got at least bitswanted contiguous bits in the
449 * local alloc. You lose them when you drop i_sem. 449 * local alloc. You lose them when you drop i_mutex.
450 * 450 *
451 * We will add ourselves to the transaction passed in, but may start 451 * We will add ourselves to the transaction passed in, but may start
452 * our own in order to shift windows. 452 * our own in order to shift windows.
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index afdeec4b0eef..843cf9ddefe8 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -80,12 +80,8 @@ static struct vm_operations_struct ocfs2_file_vm_ops = {
80 .nopage = ocfs2_nopage, 80 .nopage = ocfs2_nopage,
81}; 81};
82 82
83int ocfs2_mmap(struct file *file, 83int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
84 struct vm_area_struct *vma)
85{ 84{
86 struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
87 struct inode *inode = mapping->host;
88
89 /* We don't want to support shared writable mappings yet. */ 85 /* We don't want to support shared writable mappings yet. */
90 if (((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) 86 if (((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE))
91 && ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { 87 && ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) {
@@ -95,7 +91,7 @@ int ocfs2_mmap(struct file *file,
95 return -EINVAL; 91 return -EINVAL;
96 } 92 }
97 93
98 update_atime(inode); 94 file_accessed(file);
99 vma->vm_ops = &ocfs2_file_vm_ops; 95 vma->vm_ops = &ocfs2_file_vm_ops;
100 return 0; 96 return 0;
101} 97}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 48bf7f0ce544..364d64bd5f10 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -169,7 +169,7 @@ static match_table_t tokens = {
169 */ 169 */
170static void ocfs2_write_super(struct super_block *sb) 170static void ocfs2_write_super(struct super_block *sb)
171{ 171{
172 if (down_trylock(&sb->s_lock) == 0) 172 if (mutex_trylock(&sb->s_lock) != 0)
173 BUG(); 173 BUG();
174 sb->s_dirt = 0; 174 sb->s_dirt = 0;
175} 175}
diff --git a/fs/open.c b/fs/open.c
index f53a5b9ffb7d..70e0230d8e77 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -16,9 +16,11 @@
16#include <linux/tty.h> 16#include <linux/tty.h>
17#include <linux/namei.h> 17#include <linux/namei.h>
18#include <linux/backing-dev.h> 18#include <linux/backing-dev.h>
19#include <linux/capability.h>
19#include <linux/security.h> 20#include <linux/security.h>
20#include <linux/mount.h> 21#include <linux/mount.h>
21#include <linux/vfs.h> 22#include <linux/vfs.h>
23#include <linux/fcntl.h>
22#include <asm/uaccess.h> 24#include <asm/uaccess.h>
23#include <linux/fs.h> 25#include <linux/fs.h>
24#include <linux/personality.h> 26#include <linux/personality.h>
@@ -194,7 +196,8 @@ out:
194 return error; 196 return error;
195} 197}
196 198
197int do_truncate(struct dentry *dentry, loff_t length, struct file *filp) 199int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
200 struct file *filp)
198{ 201{
199 int err; 202 int err;
200 struct iattr newattrs; 203 struct iattr newattrs;
@@ -204,19 +207,19 @@ int do_truncate(struct dentry *dentry, loff_t length, struct file *filp)
204 return -EINVAL; 207 return -EINVAL;
205 208
206 newattrs.ia_size = length; 209 newattrs.ia_size = length;
207 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; 210 newattrs.ia_valid = ATTR_SIZE | time_attrs;
208 if (filp) { 211 if (filp) {
209 newattrs.ia_file = filp; 212 newattrs.ia_file = filp;
210 newattrs.ia_valid |= ATTR_FILE; 213 newattrs.ia_valid |= ATTR_FILE;
211 } 214 }
212 215
213 down(&dentry->d_inode->i_sem); 216 mutex_lock(&dentry->d_inode->i_mutex);
214 err = notify_change(dentry, &newattrs); 217 err = notify_change(dentry, &newattrs);
215 up(&dentry->d_inode->i_sem); 218 mutex_unlock(&dentry->d_inode->i_mutex);
216 return err; 219 return err;
217} 220}
218 221
219static inline long do_sys_truncate(const char __user * path, loff_t length) 222static long do_sys_truncate(const char __user * path, loff_t length)
220{ 223{
221 struct nameidata nd; 224 struct nameidata nd;
222 struct inode * inode; 225 struct inode * inode;
@@ -266,7 +269,7 @@ static inline long do_sys_truncate(const char __user * path, loff_t length)
266 error = locks_verify_truncate(inode, NULL, length); 269 error = locks_verify_truncate(inode, NULL, length);
267 if (!error) { 270 if (!error) {
268 DQUOT_INIT(inode); 271 DQUOT_INIT(inode);
269 error = do_truncate(nd.dentry, length, NULL); 272 error = do_truncate(nd.dentry, length, 0, NULL);
270 } 273 }
271 put_write_access(inode); 274 put_write_access(inode);
272 275
@@ -282,7 +285,7 @@ asmlinkage long sys_truncate(const char __user * path, unsigned long length)
282 return do_sys_truncate(path, (long)length); 285 return do_sys_truncate(path, (long)length);
283} 286}
284 287
285static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small) 288static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
286{ 289{
287 struct inode * inode; 290 struct inode * inode;
288 struct dentry *dentry; 291 struct dentry *dentry;
@@ -318,7 +321,7 @@ static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
318 321
319 error = locks_verify_truncate(inode, file, length); 322 error = locks_verify_truncate(inode, file, length);
320 if (!error) 323 if (!error)
321 error = do_truncate(dentry, length, file); 324 error = do_truncate(dentry, length, 0, file);
322out_putf: 325out_putf:
323 fput(file); 326 fput(file);
324out: 327out:
@@ -381,7 +384,7 @@ asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times)
381 384
382 error = get_user(newattrs.ia_atime.tv_sec, &times->actime); 385 error = get_user(newattrs.ia_atime.tv_sec, &times->actime);
383 newattrs.ia_atime.tv_nsec = 0; 386 newattrs.ia_atime.tv_nsec = 0;
384 if (!error) 387 if (!error)
385 error = get_user(newattrs.ia_mtime.tv_sec, &times->modtime); 388 error = get_user(newattrs.ia_mtime.tv_sec, &times->modtime);
386 newattrs.ia_mtime.tv_nsec = 0; 389 newattrs.ia_mtime.tv_nsec = 0;
387 if (error) 390 if (error)
@@ -397,9 +400,9 @@ asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times)
397 (error = vfs_permission(&nd, MAY_WRITE)) != 0) 400 (error = vfs_permission(&nd, MAY_WRITE)) != 0)
398 goto dput_and_out; 401 goto dput_and_out;
399 } 402 }
400 down(&inode->i_sem); 403 mutex_lock(&inode->i_mutex);
401 error = notify_change(nd.dentry, &newattrs); 404 error = notify_change(nd.dentry, &newattrs);
402 up(&inode->i_sem); 405 mutex_unlock(&inode->i_mutex);
403dput_and_out: 406dput_and_out:
404 path_release(&nd); 407 path_release(&nd);
405out: 408out:
@@ -412,14 +415,14 @@ out:
412 * must be owner or have write permission. 415 * must be owner or have write permission.
413 * Else, update from *times, must be owner or super user. 416 * Else, update from *times, must be owner or super user.
414 */ 417 */
415long do_utimes(char __user * filename, struct timeval * times) 418long do_utimes(int dfd, char __user *filename, struct timeval *times)
416{ 419{
417 int error; 420 int error;
418 struct nameidata nd; 421 struct nameidata nd;
419 struct inode * inode; 422 struct inode * inode;
420 struct iattr newattrs; 423 struct iattr newattrs;
421 424
422 error = user_path_walk(filename, &nd); 425 error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
423 426
424 if (error) 427 if (error)
425 goto out; 428 goto out;
@@ -450,22 +453,27 @@ long do_utimes(char __user * filename, struct timeval * times)
450 (error = vfs_permission(&nd, MAY_WRITE)) != 0) 453 (error = vfs_permission(&nd, MAY_WRITE)) != 0)
451 goto dput_and_out; 454 goto dput_and_out;
452 } 455 }
453 down(&inode->i_sem); 456 mutex_lock(&inode->i_mutex);
454 error = notify_change(nd.dentry, &newattrs); 457 error = notify_change(nd.dentry, &newattrs);
455 up(&inode->i_sem); 458 mutex_unlock(&inode->i_mutex);
456dput_and_out: 459dput_and_out:
457 path_release(&nd); 460 path_release(&nd);
458out: 461out:
459 return error; 462 return error;
460} 463}
461 464
462asmlinkage long sys_utimes(char __user * filename, struct timeval __user * utimes) 465asmlinkage long sys_futimesat(int dfd, char __user *filename, struct timeval __user *utimes)
463{ 466{
464 struct timeval times[2]; 467 struct timeval times[2];
465 468
466 if (utimes && copy_from_user(&times, utimes, sizeof(times))) 469 if (utimes && copy_from_user(&times, utimes, sizeof(times)))
467 return -EFAULT; 470 return -EFAULT;
468 return do_utimes(filename, utimes ? times : NULL); 471 return do_utimes(dfd, filename, utimes ? times : NULL);
472}
473
474asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes)
475{
476 return sys_futimesat(AT_FDCWD, filename, utimes);
469} 477}
470 478
471 479
@@ -474,7 +482,7 @@ asmlinkage long sys_utimes(char __user * filename, struct timeval __user * utime
474 * We do this by temporarily clearing all FS-related capabilities and 482 * We do this by temporarily clearing all FS-related capabilities and
475 * switching the fsuid/fsgid around to the real ones. 483 * switching the fsuid/fsgid around to the real ones.
476 */ 484 */
477asmlinkage long sys_access(const char __user * filename, int mode) 485asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
478{ 486{
479 struct nameidata nd; 487 struct nameidata nd;
480 int old_fsuid, old_fsgid; 488 int old_fsuid, old_fsgid;
@@ -504,7 +512,7 @@ asmlinkage long sys_access(const char __user * filename, int mode)
504 else 512 else
505 current->cap_effective = current->cap_permitted; 513 current->cap_effective = current->cap_permitted;
506 514
507 res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); 515 res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
508 if (!res) { 516 if (!res) {
509 res = vfs_permission(&nd, mode); 517 res = vfs_permission(&nd, mode);
510 /* SuS v2 requires we report a read only fs too */ 518 /* SuS v2 requires we report a read only fs too */
@@ -521,6 +529,11 @@ asmlinkage long sys_access(const char __user * filename, int mode)
521 return res; 529 return res;
522} 530}
523 531
532asmlinkage long sys_access(const char __user *filename, int mode)
533{
534 return sys_faccessat(AT_FDCWD, filename, mode);
535}
536
524asmlinkage long sys_chdir(const char __user * filename) 537asmlinkage long sys_chdir(const char __user * filename)
525{ 538{
526 struct nameidata nd; 539 struct nameidata nd;
@@ -619,13 +632,13 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
619 err = -EPERM; 632 err = -EPERM;
620 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 633 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
621 goto out_putf; 634 goto out_putf;
622 down(&inode->i_sem); 635 mutex_lock(&inode->i_mutex);
623 if (mode == (mode_t) -1) 636 if (mode == (mode_t) -1)
624 mode = inode->i_mode; 637 mode = inode->i_mode;
625 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 638 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
626 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 639 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
627 err = notify_change(dentry, &newattrs); 640 err = notify_change(dentry, &newattrs);
628 up(&inode->i_sem); 641 mutex_unlock(&inode->i_mutex);
629 642
630out_putf: 643out_putf:
631 fput(file); 644 fput(file);
@@ -633,14 +646,15 @@ out:
633 return err; 646 return err;
634} 647}
635 648
636asmlinkage long sys_chmod(const char __user * filename, mode_t mode) 649asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
650 mode_t mode)
637{ 651{
638 struct nameidata nd; 652 struct nameidata nd;
639 struct inode * inode; 653 struct inode * inode;
640 int error; 654 int error;
641 struct iattr newattrs; 655 struct iattr newattrs;
642 656
643 error = user_path_walk(filename, &nd); 657 error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
644 if (error) 658 if (error)
645 goto out; 659 goto out;
646 inode = nd.dentry->d_inode; 660 inode = nd.dentry->d_inode;
@@ -653,13 +667,13 @@ asmlinkage long sys_chmod(const char __user * filename, mode_t mode)
653 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 667 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
654 goto dput_and_out; 668 goto dput_and_out;
655 669
656 down(&inode->i_sem); 670 mutex_lock(&inode->i_mutex);
657 if (mode == (mode_t) -1) 671 if (mode == (mode_t) -1)
658 mode = inode->i_mode; 672 mode = inode->i_mode;
659 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 673 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
660 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 674 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
661 error = notify_change(nd.dentry, &newattrs); 675 error = notify_change(nd.dentry, &newattrs);
662 up(&inode->i_sem); 676 mutex_unlock(&inode->i_mutex);
663 677
664dput_and_out: 678dput_and_out:
665 path_release(&nd); 679 path_release(&nd);
@@ -667,6 +681,11 @@ out:
667 return error; 681 return error;
668} 682}
669 683
684asmlinkage long sys_chmod(const char __user *filename, mode_t mode)
685{
686 return sys_fchmodat(AT_FDCWD, filename, mode);
687}
688
670static int chown_common(struct dentry * dentry, uid_t user, gid_t group) 689static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
671{ 690{
672 struct inode * inode; 691 struct inode * inode;
@@ -695,9 +714,9 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
695 } 714 }
696 if (!S_ISDIR(inode->i_mode)) 715 if (!S_ISDIR(inode->i_mode))
697 newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; 716 newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
698 down(&inode->i_sem); 717 mutex_lock(&inode->i_mutex);
699 error = notify_change(dentry, &newattrs); 718 error = notify_change(dentry, &newattrs);
700 up(&inode->i_sem); 719 mutex_unlock(&inode->i_mutex);
701out: 720out:
702 return error; 721 return error;
703} 722}
@@ -715,6 +734,26 @@ asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
715 return error; 734 return error;
716} 735}
717 736
737asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
738 gid_t group, int flag)
739{
740 struct nameidata nd;
741 int error = -EINVAL;
742 int follow;
743
744 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
745 goto out;
746
747 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
748 error = __user_walk_fd(dfd, filename, follow, &nd);
749 if (!error) {
750 error = chown_common(nd.dentry, user, group);
751 path_release(&nd);
752 }
753out:
754 return error;
755}
756
718asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group) 757asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group)
719{ 758{
720 struct nameidata nd; 759 struct nameidata nd;
@@ -818,7 +857,8 @@ cleanup_file:
818 * for the internal routines (ie open_namei()/follow_link() etc). 00 is 857 * for the internal routines (ie open_namei()/follow_link() etc). 00 is
819 * used by symlinks. 858 * used by symlinks.
820 */ 859 */
821struct file *filp_open(const char * filename, int flags, int mode) 860static struct file *do_filp_open(int dfd, const char *filename, int flags,
861 int mode)
822{ 862{
823 int namei_flags, error; 863 int namei_flags, error;
824 struct nameidata nd; 864 struct nameidata nd;
@@ -827,12 +867,17 @@ struct file *filp_open(const char * filename, int flags, int mode)
827 if ((namei_flags+1) & O_ACCMODE) 867 if ((namei_flags+1) & O_ACCMODE)
828 namei_flags++; 868 namei_flags++;
829 869
830 error = open_namei(filename, namei_flags, mode, &nd); 870 error = open_namei(dfd, filename, namei_flags, mode, &nd);
831 if (!error) 871 if (!error)
832 return nameidata_to_filp(&nd, flags); 872 return nameidata_to_filp(&nd, flags);
833 873
834 return ERR_PTR(error); 874 return ERR_PTR(error);
835} 875}
876
877struct file *filp_open(const char *filename, int flags, int mode)
878{
879 return do_filp_open(AT_FDCWD, filename, flags, mode);
880}
836EXPORT_SYMBOL(filp_open); 881EXPORT_SYMBOL(filp_open);
837 882
838/** 883/**
@@ -970,7 +1015,7 @@ out:
970 1015
971EXPORT_SYMBOL(get_unused_fd); 1016EXPORT_SYMBOL(get_unused_fd);
972 1017
973static inline void __put_unused_fd(struct files_struct *files, unsigned int fd) 1018static void __put_unused_fd(struct files_struct *files, unsigned int fd)
974{ 1019{
975 struct fdtable *fdt = files_fdtable(files); 1020 struct fdtable *fdt = files_fdtable(files);
976 __FD_CLR(fd, fdt->open_fds); 1021 __FD_CLR(fd, fdt->open_fds);
@@ -989,7 +1034,7 @@ void fastcall put_unused_fd(unsigned int fd)
989EXPORT_SYMBOL(put_unused_fd); 1034EXPORT_SYMBOL(put_unused_fd);
990 1035
991/* 1036/*
992 * Install a file pointer in the fd array. 1037 * Install a file pointer in the fd array.
993 * 1038 *
994 * The VFS is full of places where we drop the files lock between 1039 * The VFS is full of places where we drop the files lock between
995 * setting the open_fds bitmap and installing the file in the file 1040 * setting the open_fds bitmap and installing the file in the file
@@ -1014,7 +1059,7 @@ void fastcall fd_install(unsigned int fd, struct file * file)
1014 1059
1015EXPORT_SYMBOL(fd_install); 1060EXPORT_SYMBOL(fd_install);
1016 1061
1017long do_sys_open(const char __user *filename, int flags, int mode) 1062long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
1018{ 1063{
1019 char *tmp = getname(filename); 1064 char *tmp = getname(filename);
1020 int fd = PTR_ERR(tmp); 1065 int fd = PTR_ERR(tmp);
@@ -1022,7 +1067,7 @@ long do_sys_open(const char __user *filename, int flags, int mode)
1022 if (!IS_ERR(tmp)) { 1067 if (!IS_ERR(tmp)) {
1023 fd = get_unused_fd(); 1068 fd = get_unused_fd();
1024 if (fd >= 0) { 1069 if (fd >= 0) {
1025 struct file *f = filp_open(tmp, flags, mode); 1070 struct file *f = do_filp_open(dfd, tmp, flags, mode);
1026 if (IS_ERR(f)) { 1071 if (IS_ERR(f)) {
1027 put_unused_fd(fd); 1072 put_unused_fd(fd);
1028 fd = PTR_ERR(f); 1073 fd = PTR_ERR(f);
@@ -1041,10 +1086,20 @@ asmlinkage long sys_open(const char __user *filename, int flags, int mode)
1041 if (force_o_largefile()) 1086 if (force_o_largefile())
1042 flags |= O_LARGEFILE; 1087 flags |= O_LARGEFILE;
1043 1088
1044 return do_sys_open(filename, flags, mode); 1089 return do_sys_open(AT_FDCWD, filename, flags, mode);
1045} 1090}
1046EXPORT_SYMBOL_GPL(sys_open); 1091EXPORT_SYMBOL_GPL(sys_open);
1047 1092
1093asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
1094 int mode)
1095{
1096 if (force_o_largefile())
1097 flags |= O_LARGEFILE;
1098
1099 return do_sys_open(dfd, filename, flags, mode);
1100}
1101EXPORT_SYMBOL_GPL(sys_openat);
1102
1048#ifndef __alpha__ 1103#ifndef __alpha__
1049 1104
1050/* 1105/*
diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig
index e227a04261ab..c9a478099281 100644
--- a/fs/partitions/Kconfig
+++ b/fs/partitions/Kconfig
@@ -21,26 +21,30 @@ config ACORN_PARTITION
21 Support hard disks partitioned under Acorn operating systems. 21 Support hard disks partitioned under Acorn operating systems.
22 22
23config ACORN_PARTITION_CUMANA 23config ACORN_PARTITION_CUMANA
24 bool "Cumana partition support" if PARTITION_ADVANCED && ACORN_PARTITION 24 bool "Cumana partition support" if PARTITION_ADVANCED
25 default y if ARCH_ACORN 25 default y if ARCH_ACORN
26 depends on ACORN_PARTITION
26 help 27 help
27 Say Y here if you would like to use hard disks under Linux which 28 Say Y here if you would like to use hard disks under Linux which
28 were partitioned using the Cumana interface on Acorn machines. 29 were partitioned using the Cumana interface on Acorn machines.
29 30
30config ACORN_PARTITION_EESOX 31config ACORN_PARTITION_EESOX
31 bool "EESOX partition support" if PARTITION_ADVANCED && ACORN_PARTITION 32 bool "EESOX partition support" if PARTITION_ADVANCED
32 default y if ARCH_ACORN 33 default y if ARCH_ACORN
34 depends on ACORN_PARTITION
33 35
34config ACORN_PARTITION_ICS 36config ACORN_PARTITION_ICS
35 bool "ICS partition support" if PARTITION_ADVANCED && ACORN_PARTITION 37 bool "ICS partition support" if PARTITION_ADVANCED
36 default y if ARCH_ACORN 38 default y if ARCH_ACORN
39 depends on ACORN_PARTITION
37 help 40 help
38 Say Y here if you would like to use hard disks under Linux which 41 Say Y here if you would like to use hard disks under Linux which
39 were partitioned using the ICS interface on Acorn machines. 42 were partitioned using the ICS interface on Acorn machines.
40 43
41config ACORN_PARTITION_ADFS 44config ACORN_PARTITION_ADFS
42 bool "Native filecore partition support" if PARTITION_ADVANCED && ACORN_PARTITION 45 bool "Native filecore partition support" if PARTITION_ADVANCED
43 default y if ARCH_ACORN 46 default y if ARCH_ACORN
47 depends on ACORN_PARTITION
44 help 48 help
45 The Acorn Disc Filing System is the standard file system of the 49 The Acorn Disc Filing System is the standard file system of the
46 RiscOS operating system which runs on Acorn's ARM-based Risc PC 50 RiscOS operating system which runs on Acorn's ARM-based Risc PC
@@ -48,15 +52,17 @@ config ACORN_PARTITION_ADFS
48 `Y' here, Linux will support disk partitions created under ADFS. 52 `Y' here, Linux will support disk partitions created under ADFS.
49 53
50config ACORN_PARTITION_POWERTEC 54config ACORN_PARTITION_POWERTEC
51 bool "PowerTec partition support" if PARTITION_ADVANCED && ACORN_PARTITION 55 bool "PowerTec partition support" if PARTITION_ADVANCED
52 default y if ARCH_ACORN 56 default y if ARCH_ACORN
57 depends on ACORN_PARTITION
53 help 58 help
54 Support reading partition tables created on Acorn machines using 59 Support reading partition tables created on Acorn machines using
55 the PowerTec SCSI drive. 60 the PowerTec SCSI drive.
56 61
57config ACORN_PARTITION_RISCIX 62config ACORN_PARTITION_RISCIX
58 bool "RISCiX partition support" if PARTITION_ADVANCED && ACORN_PARTITION 63 bool "RISCiX partition support" if PARTITION_ADVANCED
59 default y if ARCH_ACORN 64 default y if ARCH_ACORN
65 depends on ACORN_PARTITION
60 help 66 help
61 Once upon a time, there was a native Unix port for the Acorn series 67 Once upon a time, there was a native Unix port for the Acorn series
62 of machines called RISCiX. If you say 'Y' here, Linux will be able 68 of machines called RISCiX. If you say 'Y' here, Linux will be able
@@ -216,6 +222,13 @@ config SUN_PARTITION
216 given by the tar program ("man tar" or preferably "info tar"). If 222 given by the tar program ("man tar" or preferably "info tar"). If
217 you don't know what all this is about, say N. 223 you don't know what all this is about, say N.
218 224
225config KARMA_PARTITION
226 bool "Karma Partition support"
227 depends on PARTITION_ADVANCED
228 help
229 Say Y here if you would like to mount the Rio Karma MP3 player, as it
230 uses a proprietary partition table.
231
219config EFI_PARTITION 232config EFI_PARTITION
220 bool "EFI GUID Partition support" 233 bool "EFI GUID Partition support"
221 depends on PARTITION_ADVANCED 234 depends on PARTITION_ADVANCED
@@ -224,5 +237,3 @@ config EFI_PARTITION
224 Say Y here if you would like to use hard disks under Linux which 237 Say Y here if you would like to use hard disks under Linux which
225 were partitioned using EFI GPT. Presently only useful on the 238 were partitioned using EFI GPT. Presently only useful on the
226 IA-64 platform. 239 IA-64 platform.
227
228# define_bool CONFIG_ACORN_PARTITION_CUMANA y
diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile
index 66d5cc26fafb..42c7d3878ed0 100644
--- a/fs/partitions/Makefile
+++ b/fs/partitions/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_SUN_PARTITION) += sun.o
17obj-$(CONFIG_ULTRIX_PARTITION) += ultrix.o 17obj-$(CONFIG_ULTRIX_PARTITION) += ultrix.o
18obj-$(CONFIG_IBM_PARTITION) += ibm.o 18obj-$(CONFIG_IBM_PARTITION) += ibm.o
19obj-$(CONFIG_EFI_PARTITION) += efi.o 19obj-$(CONFIG_EFI_PARTITION) += efi.o
20obj-$(CONFIG_KARMA_PARTITION) += karma.o
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7881ce05daef..f924f459bdb8 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -35,6 +35,7 @@
35#include "ibm.h" 35#include "ibm.h"
36#include "ultrix.h" 36#include "ultrix.h"
37#include "efi.h" 37#include "efi.h"
38#include "karma.h"
38 39
39#ifdef CONFIG_BLK_DEV_MD 40#ifdef CONFIG_BLK_DEV_MD
40extern void md_autodetect_dev(dev_t dev); 41extern void md_autodetect_dev(dev_t dev);
@@ -103,6 +104,9 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) =
103#ifdef CONFIG_IBM_PARTITION 104#ifdef CONFIG_IBM_PARTITION
104 ibm_partition, 105 ibm_partition,
105#endif 106#endif
107#ifdef CONFIG_KARMA_PARTITION
108 karma_partition,
109#endif
106 NULL 110 NULL
107}; 111};
108 112
diff --git a/fs/partitions/karma.c b/fs/partitions/karma.c
new file mode 100644
index 000000000000..176d89bcf123
--- /dev/null
+++ b/fs/partitions/karma.c
@@ -0,0 +1,57 @@
1/*
2 * fs/partitions/karma.c
3 * Rio Karma partition info.
4 *
5 * Copyright (C) 2006 Bob Copeland (me@bobcopeland.com)
6 * based on osf.c
7 */
8
9#include "check.h"
10#include "karma.h"
11
12int karma_partition(struct parsed_partitions *state, struct block_device *bdev)
13{
14 int i;
15 int slot = 1;
16 Sector sect;
17 unsigned char *data;
18 struct disklabel {
19 u8 d_reserved[270];
20 struct d_partition {
21 __le32 p_res;
22 u8 p_fstype;
23 u8 p_res2[3];
24 __le32 p_offset;
25 __le32 p_size;
26 } d_partitions[2];
27 u8 d_blank[208];
28 __le16 d_magic;
29 } __attribute__((packed)) *label;
30 struct d_partition *p;
31
32 data = read_dev_sector(bdev, 0, &sect);
33 if (!data)
34 return -1;
35
36 label = (struct disklabel *)data;
37 if (le16_to_cpu(label->d_magic) != KARMA_LABEL_MAGIC) {
38 put_dev_sector(sect);
39 return 0;
40 }
41
42 p = label->d_partitions;
43 for (i = 0 ; i < 2; i++, p++) {
44 if (slot == state->limit)
45 break;
46
47 if (p->p_fstype == 0x4d && le32_to_cpu(p->p_size)) {
48 put_partition(state, slot, le32_to_cpu(p->p_offset),
49 le32_to_cpu(p->p_size));
50 }
51 slot++;
52 }
53 printk("\n");
54 put_dev_sector(sect);
55 return 1;
56}
57
diff --git a/fs/partitions/karma.h b/fs/partitions/karma.h
new file mode 100644
index 000000000000..ecf7d3f2a3d8
--- /dev/null
+++ b/fs/partitions/karma.h
@@ -0,0 +1,8 @@
1/*
2 * fs/partitions/karma.h
3 */
4
5#define KARMA_LABEL_MAGIC 0xAB56
6
7int karma_partition(struct parsed_partitions *state, struct block_device *bdev);
8
diff --git a/fs/pipe.c b/fs/pipe.c
index 66aa0b938d6a..d722579df79a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -44,13 +44,13 @@ void pipe_wait(struct inode * inode)
44 * is considered a noninteractive wait: 44 * is considered a noninteractive wait:
45 */ 45 */
46 prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE); 46 prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
47 up(PIPE_SEM(*inode)); 47 mutex_unlock(PIPE_MUTEX(*inode));
48 schedule(); 48 schedule();
49 finish_wait(PIPE_WAIT(*inode), &wait); 49 finish_wait(PIPE_WAIT(*inode), &wait);
50 down(PIPE_SEM(*inode)); 50 mutex_lock(PIPE_MUTEX(*inode));
51} 51}
52 52
53static inline int 53static int
54pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) 54pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
55{ 55{
56 unsigned long copy; 56 unsigned long copy;
@@ -70,7 +70,7 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
70 return 0; 70 return 0;
71} 71}
72 72
73static inline int 73static int
74pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) 74pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
75{ 75{
76 unsigned long copy; 76 unsigned long copy;
@@ -136,7 +136,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
136 136
137 do_wakeup = 0; 137 do_wakeup = 0;
138 ret = 0; 138 ret = 0;
139 down(PIPE_SEM(*inode)); 139 mutex_lock(PIPE_MUTEX(*inode));
140 info = inode->i_pipe; 140 info = inode->i_pipe;
141 for (;;) { 141 for (;;) {
142 int bufs = info->nrbufs; 142 int bufs = info->nrbufs;
@@ -200,7 +200,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
200 } 200 }
201 pipe_wait(inode); 201 pipe_wait(inode);
202 } 202 }
203 up(PIPE_SEM(*inode)); 203 mutex_unlock(PIPE_MUTEX(*inode));
204 /* Signal writers asynchronously that there is more room. */ 204 /* Signal writers asynchronously that there is more room. */
205 if (do_wakeup) { 205 if (do_wakeup) {
206 wake_up_interruptible(PIPE_WAIT(*inode)); 206 wake_up_interruptible(PIPE_WAIT(*inode));
@@ -237,7 +237,7 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
237 237
238 do_wakeup = 0; 238 do_wakeup = 0;
239 ret = 0; 239 ret = 0;
240 down(PIPE_SEM(*inode)); 240 mutex_lock(PIPE_MUTEX(*inode));
241 info = inode->i_pipe; 241 info = inode->i_pipe;
242 242
243 if (!PIPE_READERS(*inode)) { 243 if (!PIPE_READERS(*inode)) {
@@ -341,13 +341,13 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
341 PIPE_WAITING_WRITERS(*inode)--; 341 PIPE_WAITING_WRITERS(*inode)--;
342 } 342 }
343out: 343out:
344 up(PIPE_SEM(*inode)); 344 mutex_unlock(PIPE_MUTEX(*inode));
345 if (do_wakeup) { 345 if (do_wakeup) {
346 wake_up_interruptible(PIPE_WAIT(*inode)); 346 wake_up_interruptible(PIPE_WAIT(*inode));
347 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 347 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
348 } 348 }
349 if (ret > 0) 349 if (ret > 0)
350 inode_update_time(inode, 1); /* mtime and ctime */ 350 file_update_time(filp);
351 return ret; 351 return ret;
352} 352}
353 353
@@ -381,7 +381,7 @@ pipe_ioctl(struct inode *pino, struct file *filp,
381 381
382 switch (cmd) { 382 switch (cmd) {
383 case FIONREAD: 383 case FIONREAD:
384 down(PIPE_SEM(*inode)); 384 mutex_lock(PIPE_MUTEX(*inode));
385 info = inode->i_pipe; 385 info = inode->i_pipe;
386 count = 0; 386 count = 0;
387 buf = info->curbuf; 387 buf = info->curbuf;
@@ -390,7 +390,7 @@ pipe_ioctl(struct inode *pino, struct file *filp,
390 count += info->bufs[buf].len; 390 count += info->bufs[buf].len;
391 buf = (buf+1) & (PIPE_BUFFERS-1); 391 buf = (buf+1) & (PIPE_BUFFERS-1);
392 } 392 }
393 up(PIPE_SEM(*inode)); 393 mutex_unlock(PIPE_MUTEX(*inode));
394 return put_user(count, (int __user *)arg); 394 return put_user(count, (int __user *)arg);
395 default: 395 default:
396 return -EINVAL; 396 return -EINVAL;
@@ -433,7 +433,7 @@ pipe_poll(struct file *filp, poll_table *wait)
433static int 433static int
434pipe_release(struct inode *inode, int decr, int decw) 434pipe_release(struct inode *inode, int decr, int decw)
435{ 435{
436 down(PIPE_SEM(*inode)); 436 mutex_lock(PIPE_MUTEX(*inode));
437 PIPE_READERS(*inode) -= decr; 437 PIPE_READERS(*inode) -= decr;
438 PIPE_WRITERS(*inode) -= decw; 438 PIPE_WRITERS(*inode) -= decw;
439 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) { 439 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
@@ -443,7 +443,7 @@ pipe_release(struct inode *inode, int decr, int decw)
443 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 443 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
444 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 444 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
445 } 445 }
446 up(PIPE_SEM(*inode)); 446 mutex_unlock(PIPE_MUTEX(*inode));
447 447
448 return 0; 448 return 0;
449} 449}
@@ -454,9 +454,9 @@ pipe_read_fasync(int fd, struct file *filp, int on)
454 struct inode *inode = filp->f_dentry->d_inode; 454 struct inode *inode = filp->f_dentry->d_inode;
455 int retval; 455 int retval;
456 456
457 down(PIPE_SEM(*inode)); 457 mutex_lock(PIPE_MUTEX(*inode));
458 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); 458 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
459 up(PIPE_SEM(*inode)); 459 mutex_unlock(PIPE_MUTEX(*inode));
460 460
461 if (retval < 0) 461 if (retval < 0)
462 return retval; 462 return retval;
@@ -471,9 +471,9 @@ pipe_write_fasync(int fd, struct file *filp, int on)
471 struct inode *inode = filp->f_dentry->d_inode; 471 struct inode *inode = filp->f_dentry->d_inode;
472 int retval; 472 int retval;
473 473
474 down(PIPE_SEM(*inode)); 474 mutex_lock(PIPE_MUTEX(*inode));
475 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); 475 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
476 up(PIPE_SEM(*inode)); 476 mutex_unlock(PIPE_MUTEX(*inode));
477 477
478 if (retval < 0) 478 if (retval < 0)
479 return retval; 479 return retval;
@@ -488,14 +488,14 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on)
488 struct inode *inode = filp->f_dentry->d_inode; 488 struct inode *inode = filp->f_dentry->d_inode;
489 int retval; 489 int retval;
490 490
491 down(PIPE_SEM(*inode)); 491 mutex_lock(PIPE_MUTEX(*inode));
492 492
493 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); 493 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
494 494
495 if (retval >= 0) 495 if (retval >= 0)
496 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); 496 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
497 497
498 up(PIPE_SEM(*inode)); 498 mutex_unlock(PIPE_MUTEX(*inode));
499 499
500 if (retval < 0) 500 if (retval < 0)
501 return retval; 501 return retval;
@@ -534,9 +534,9 @@ pipe_read_open(struct inode *inode, struct file *filp)
534{ 534{
535 /* We could have perhaps used atomic_t, but this and friends 535 /* We could have perhaps used atomic_t, but this and friends
536 below are the only places. So it doesn't seem worthwhile. */ 536 below are the only places. So it doesn't seem worthwhile. */
537 down(PIPE_SEM(*inode)); 537 mutex_lock(PIPE_MUTEX(*inode));
538 PIPE_READERS(*inode)++; 538 PIPE_READERS(*inode)++;
539 up(PIPE_SEM(*inode)); 539 mutex_unlock(PIPE_MUTEX(*inode));
540 540
541 return 0; 541 return 0;
542} 542}
@@ -544,9 +544,9 @@ pipe_read_open(struct inode *inode, struct file *filp)
544static int 544static int
545pipe_write_open(struct inode *inode, struct file *filp) 545pipe_write_open(struct inode *inode, struct file *filp)
546{ 546{
547 down(PIPE_SEM(*inode)); 547 mutex_lock(PIPE_MUTEX(*inode));
548 PIPE_WRITERS(*inode)++; 548 PIPE_WRITERS(*inode)++;
549 up(PIPE_SEM(*inode)); 549 mutex_unlock(PIPE_MUTEX(*inode));
550 550
551 return 0; 551 return 0;
552} 552}
@@ -554,12 +554,12 @@ pipe_write_open(struct inode *inode, struct file *filp)
554static int 554static int
555pipe_rdwr_open(struct inode *inode, struct file *filp) 555pipe_rdwr_open(struct inode *inode, struct file *filp)
556{ 556{
557 down(PIPE_SEM(*inode)); 557 mutex_lock(PIPE_MUTEX(*inode));
558 if (filp->f_mode & FMODE_READ) 558 if (filp->f_mode & FMODE_READ)
559 PIPE_READERS(*inode)++; 559 PIPE_READERS(*inode)++;
560 if (filp->f_mode & FMODE_WRITE) 560 if (filp->f_mode & FMODE_WRITE)
561 PIPE_WRITERS(*inode)++; 561 PIPE_WRITERS(*inode)++;
562 up(PIPE_SEM(*inode)); 562 mutex_unlock(PIPE_MUTEX(*inode));
563 563
564 return 0; 564 return 0;
565} 565}
diff --git a/fs/pnode.c b/fs/pnode.c
index aeeec8ba8dd2..f1871f773f64 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -103,7 +103,7 @@ static struct vfsmount *propagation_next(struct vfsmount *m,
103 struct vfsmount *next; 103 struct vfsmount *next;
104 struct vfsmount *master = m->mnt_master; 104 struct vfsmount *master = m->mnt_master;
105 105
106 if ( master == origin->mnt_master ) { 106 if (master == origin->mnt_master) {
107 next = next_peer(m); 107 next = next_peer(m);
108 return ((next == origin) ? NULL : next); 108 return ((next == origin) ? NULL : next);
109 } else if (m->mnt_slave.next != &master->mnt_slave_list) 109 } else if (m->mnt_slave.next != &master->mnt_slave_list)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 5e9251f65317..7eb1bd7f800c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -330,7 +330,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
330 unsigned long min_flt = 0, maj_flt = 0; 330 unsigned long min_flt = 0, maj_flt = 0;
331 cputime_t cutime, cstime, utime, stime; 331 cputime_t cutime, cstime, utime, stime;
332 unsigned long rsslim = 0; 332 unsigned long rsslim = 0;
333 unsigned long it_real_value = 0; 333 DEFINE_KTIME(it_real_value);
334 struct task_struct *t; 334 struct task_struct *t;
335 char tcomm[sizeof(task->comm)]; 335 char tcomm[sizeof(task->comm)];
336 336
@@ -386,7 +386,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
386 utime = cputime_add(utime, task->signal->utime); 386 utime = cputime_add(utime, task->signal->utime);
387 stime = cputime_add(stime, task->signal->stime); 387 stime = cputime_add(stime, task->signal->stime);
388 } 388 }
389 it_real_value = task->signal->it_real_value; 389 it_real_value = task->signal->real_timer.expires;
390 } 390 }
391 ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0; 391 ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
392 read_unlock(&tasklist_lock); 392 read_unlock(&tasklist_lock);
@@ -435,7 +435,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
435 priority, 435 priority,
436 nice, 436 nice,
437 num_threads, 437 num_threads,
438 jiffies_to_clock_t(it_real_value), 438 (long) ktime_to_clock_t(it_real_value),
439 start_time, 439 start_time,
440 vsize, 440 vsize,
441 mm ? get_mm_rss(mm) : 0, 441 mm ? get_mm_rss(mm) : 0,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 634355e16986..20feb7568deb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -55,6 +55,7 @@
55#include <linux/proc_fs.h> 55#include <linux/proc_fs.h>
56#include <linux/stat.h> 56#include <linux/stat.h>
57#include <linux/init.h> 57#include <linux/init.h>
58#include <linux/capability.h>
58#include <linux/file.h> 59#include <linux/file.h>
59#include <linux/string.h> 60#include <linux/string.h>
60#include <linux/seq_file.h> 61#include <linux/seq_file.h>
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 72b431d0a0a4..20e5c4509a43 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -21,6 +21,8 @@
21#include <linux/bitops.h> 21#include <linux/bitops.h>
22#include <asm/uaccess.h> 22#include <asm/uaccess.h>
23 23
24#include "internal.h"
25
24static ssize_t proc_file_read(struct file *file, char __user *buf, 26static ssize_t proc_file_read(struct file *file, char __user *buf,
25 size_t nbytes, loff_t *ppos); 27 size_t nbytes, loff_t *ppos);
26static ssize_t proc_file_write(struct file *file, const char __user *buffer, 28static ssize_t proc_file_write(struct file *file, const char __user *buffer,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index e6a818a93f3d..6573f31f1fd9 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -19,7 +19,7 @@
19#include <asm/system.h> 19#include <asm/system.h>
20#include <asm/uaccess.h> 20#include <asm/uaccess.h>
21 21
22extern void free_proc_entry(struct proc_dir_entry *); 22#include "internal.h"
23 23
24static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de) 24static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
25{ 25{
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3e55198f9806..95a1cf32b838 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -37,6 +37,10 @@ extern int proc_tgid_stat(struct task_struct *, char *);
37extern int proc_pid_status(struct task_struct *, char *); 37extern int proc_pid_status(struct task_struct *, char *);
38extern int proc_pid_statm(struct task_struct *, char *); 38extern int proc_pid_statm(struct task_struct *, char *);
39 39
40void free_proc_entry(struct proc_dir_entry *de);
41
42int proc_init_inodecache(void);
43
40static inline struct task_struct *proc_task(struct inode *inode) 44static inline struct task_struct *proc_task(struct inode *inode)
41{ 45{
42 return PROC_I(inode)->task; 46 return PROC_I(inode)->task;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 1c7da988fcc3..adc2cd95169a 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -14,6 +14,7 @@
14#include <linux/proc_fs.h> 14#include <linux/proc_fs.h>
15#include <linux/user.h> 15#include <linux/user.h>
16#include <linux/a.out.h> 16#include <linux/a.out.h>
17#include <linux/capability.h>
17#include <linux/elf.h> 18#include <linux/elf.h>
18#include <linux/elfcore.h> 19#include <linux/elfcore.h>
19#include <linux/vmalloc.h> 20#include <linux/vmalloc.h>
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index fb117b74809e..9bdd077d6f55 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -81,6 +81,30 @@ void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop
81 __proc_device_tree_add_prop(pde, prop); 81 __proc_device_tree_add_prop(pde, prop);
82} 82}
83 83
84void proc_device_tree_remove_prop(struct proc_dir_entry *pde,
85 struct property *prop)
86{
87 remove_proc_entry(prop->name, pde);
88}
89
90void proc_device_tree_update_prop(struct proc_dir_entry *pde,
91 struct property *newprop,
92 struct property *oldprop)
93{
94 struct proc_dir_entry *ent;
95
96 for (ent = pde->subdir; ent != NULL; ent = ent->next)
97 if (ent->data == oldprop)
98 break;
99 if (ent == NULL) {
100 printk(KERN_WARNING "device-tree: property \"%s\" "
101 " does not exist\n", oldprop->name);
102 } else {
103 ent->data = newprop;
104 ent->size = newprop->length;
105 }
106}
107
84/* 108/*
85 * Process a node, adding entries for its children and its properties. 109 * Process a node, adding entries for its children and its properties.
86 */ 110 */
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 5b6b0b6038a7..8f8014285a34 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -20,6 +20,7 @@
20#include <linux/time.h> 20#include <linux/time.h>
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/kernel_stat.h> 22#include <linux/kernel_stat.h>
23#include <linux/fs.h>
23#include <linux/tty.h> 24#include <linux/tty.h>
24#include <linux/string.h> 25#include <linux/string.h>
25#include <linux/mman.h> 26#include <linux/mman.h>
@@ -62,7 +63,6 @@
62 */ 63 */
63extern int get_hardware_list(char *); 64extern int get_hardware_list(char *);
64extern int get_stram_list(char *); 65extern int get_stram_list(char *);
65extern int get_chrdev_list(char *);
66extern int get_filesystem_list(char *); 66extern int get_filesystem_list(char *);
67extern int get_exec_domain_list(char *); 67extern int get_exec_domain_list(char *);
68extern int get_dma_list(char *); 68extern int get_dma_list(char *);
@@ -248,6 +248,154 @@ static int cpuinfo_open(struct inode *inode, struct file *file)
248{ 248{
249 return seq_open(file, &cpuinfo_op); 249 return seq_open(file, &cpuinfo_op);
250} 250}
251
252enum devinfo_states {
253 CHR_HDR,
254 CHR_LIST,
255 BLK_HDR,
256 BLK_LIST,
257 DEVINFO_DONE
258};
259
260struct devinfo_state {
261 void *chrdev;
262 void *blkdev;
263 unsigned int num_records;
264 unsigned int cur_record;
265 enum devinfo_states state;
266};
267
268static void *devinfo_start(struct seq_file *f, loff_t *pos)
269{
270 struct devinfo_state *info = f->private;
271
272 if (*pos) {
273 if ((info) && (*pos <= info->num_records))
274 return info;
275 return NULL;
276 }
277 info = kmalloc(sizeof(*info), GFP_KERNEL);
278 f->private = info;
279 info->chrdev = acquire_chrdev_list();
280 info->blkdev = acquire_blkdev_list();
281 info->state = CHR_HDR;
282 info->num_records = count_chrdev_list();
283 info->num_records += count_blkdev_list();
284 info->num_records += 2; /* Character and Block headers */
285 *pos = 1;
286 info->cur_record = *pos;
287 return info;
288}
289
290static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos)
291{
292 int idummy;
293 char *ndummy;
294 struct devinfo_state *info = f->private;
295
296 switch (info->state) {
297 case CHR_HDR:
298 info->state = CHR_LIST;
299 (*pos)++;
300 /*fallthrough*/
301 case CHR_LIST:
302 if (get_chrdev_info(info->chrdev,&idummy,&ndummy)) {
303 /*
304 * The character dev list is complete
305 */
306 info->state = BLK_HDR;
307 } else {
308 info->chrdev = get_next_chrdev(info->chrdev);
309 }
310 (*pos)++;
311 break;
312 case BLK_HDR:
313 info->state = BLK_LIST;
314 (*pos)++;
315 break;
316 case BLK_LIST:
317 if (get_blkdev_info(info->blkdev,&idummy,&ndummy)) {
318 /*
319 * The block dev list is complete
320 */
321 info->state = DEVINFO_DONE;
322 } else {
323 info->blkdev = get_next_blkdev(info->blkdev);
324 }
325 (*pos)++;
326 break;
327 case DEVINFO_DONE:
328 (*pos)++;
329 info->cur_record = *pos;
330 info = NULL;
331 break;
332 default:
333 break;
334 }
335 if (info)
336 info->cur_record = *pos;
337 return info;
338}
339
340static void devinfo_stop(struct seq_file *f, void *v)
341{
342 struct devinfo_state *info = f->private;
343
344 if (info) {
345 release_chrdev_list(info->chrdev);
346 release_blkdev_list(info->blkdev);
347 f->private = NULL;
348 kfree(info);
349 }
350}
351
352static int devinfo_show(struct seq_file *f, void *arg)
353{
354 int major;
355 char *name;
356 struct devinfo_state *info = f->private;
357
358 switch(info->state) {
359 case CHR_HDR:
360 seq_printf(f,"Character devices:\n");
361 /* fallthrough */
362 case CHR_LIST:
363 if (!get_chrdev_info(info->chrdev,&major,&name))
364 seq_printf(f,"%3d %s\n",major,name);
365 break;
366 case BLK_HDR:
367 seq_printf(f,"\nBlock devices:\n");
368 /* fallthrough */
369 case BLK_LIST:
370 if (!get_blkdev_info(info->blkdev,&major,&name))
371 seq_printf(f,"%3d %s\n",major,name);
372 break;
373 default:
374 break;
375 }
376
377 return 0;
378}
379
380static struct seq_operations devinfo_op = {
381 .start = devinfo_start,
382 .next = devinfo_next,
383 .stop = devinfo_stop,
384 .show = devinfo_show,
385};
386
387static int devinfo_open(struct inode *inode, struct file *file)
388{
389 return seq_open(file, &devinfo_op);
390}
391
392static struct file_operations proc_devinfo_operations = {
393 .open = devinfo_open,
394 .read = seq_read,
395 .llseek = seq_lseek,
396 .release = seq_release,
397};
398
251static struct file_operations proc_cpuinfo_operations = { 399static struct file_operations proc_cpuinfo_operations = {
252 .open = cpuinfo_open, 400 .open = cpuinfo_open,
253 .read = seq_read, 401 .read = seq_read,
@@ -323,6 +471,7 @@ static struct file_operations proc_modules_operations = {
323}; 471};
324#endif 472#endif
325 473
474#ifdef CONFIG_SLAB
326extern struct seq_operations slabinfo_op; 475extern struct seq_operations slabinfo_op;
327extern ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *); 476extern ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *);
328static int slabinfo_open(struct inode *inode, struct file *file) 477static int slabinfo_open(struct inode *inode, struct file *file)
@@ -336,6 +485,7 @@ static struct file_operations proc_slabinfo_operations = {
336 .llseek = seq_lseek, 485 .llseek = seq_lseek,
337 .release = seq_release, 486 .release = seq_release,
338}; 487};
488#endif
339 489
340static int show_stat(struct seq_file *p, void *v) 490static int show_stat(struct seq_file *p, void *v)
341{ 491{
@@ -448,14 +598,6 @@ static struct file_operations proc_stat_operations = {
448 .release = single_release, 598 .release = single_release,
449}; 599};
450 600
451static int devices_read_proc(char *page, char **start, off_t off,
452 int count, int *eof, void *data)
453{
454 int len = get_chrdev_list(page);
455 len += get_blkdev_list(page+len, len);
456 return proc_calc_metrics(page, start, off, count, eof, len);
457}
458
459/* 601/*
460 * /proc/interrupts 602 * /proc/interrupts
461 */ 603 */
@@ -580,7 +722,6 @@ void __init proc_misc_init(void)
580#ifdef CONFIG_STRAM_PROC 722#ifdef CONFIG_STRAM_PROC
581 {"stram", stram_read_proc}, 723 {"stram", stram_read_proc},
582#endif 724#endif
583 {"devices", devices_read_proc},
584 {"filesystems", filesystems_read_proc}, 725 {"filesystems", filesystems_read_proc},
585 {"cmdline", cmdline_read_proc}, 726 {"cmdline", cmdline_read_proc},
586 {"locks", locks_read_proc}, 727 {"locks", locks_read_proc},
@@ -596,11 +737,14 @@ void __init proc_misc_init(void)
596 entry = create_proc_entry("kmsg", S_IRUSR, &proc_root); 737 entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
597 if (entry) 738 if (entry)
598 entry->proc_fops = &proc_kmsg_operations; 739 entry->proc_fops = &proc_kmsg_operations;
740 create_seq_entry("devices", 0, &proc_devinfo_operations);
599 create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); 741 create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
600 create_seq_entry("partitions", 0, &proc_partitions_operations); 742 create_seq_entry("partitions", 0, &proc_partitions_operations);
601 create_seq_entry("stat", 0, &proc_stat_operations); 743 create_seq_entry("stat", 0, &proc_stat_operations);
602 create_seq_entry("interrupts", 0, &proc_interrupts_operations); 744 create_seq_entry("interrupts", 0, &proc_interrupts_operations);
745#ifdef CONFIG_SLAB
603 create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); 746 create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
747#endif
604 create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); 748 create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
605 create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); 749 create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
606 create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); 750 create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index aef148f099a2..68896283c8ae 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,6 +18,8 @@
18#include <linux/bitops.h> 18#include <linux/bitops.h>
19#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
20 20
21#include "internal.h"
22
21struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; 23struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
22 24
23#ifdef CONFIG_SYSCTL 25#ifdef CONFIG_SYSCTL
@@ -36,7 +38,6 @@ static struct file_system_type proc_fs_type = {
36 .kill_sb = kill_anon_super, 38 .kill_sb = kill_anon_super,
37}; 39};
38 40
39extern int __init proc_init_inodecache(void);
40void __init proc_root_init(void) 41void __init proc_root_init(void)
41{ 42{
42 int err = proc_init_inodecache(); 43 int err = proc_init_inodecache();
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 50bd5a8f0446..0eaad41f4658 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -390,129 +390,12 @@ struct seq_operations proc_pid_smaps_op = {
390}; 390};
391 391
392#ifdef CONFIG_NUMA 392#ifdef CONFIG_NUMA
393 393extern int show_numa_map(struct seq_file *m, void *v);
394struct numa_maps {
395 unsigned long pages;
396 unsigned long anon;
397 unsigned long mapped;
398 unsigned long mapcount_max;
399 unsigned long node[MAX_NUMNODES];
400};
401
402/*
403 * Calculate numa node maps for a vma
404 */
405static struct numa_maps *get_numa_maps(struct vm_area_struct *vma)
406{
407 int i;
408 struct page *page;
409 unsigned long vaddr;
410 struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL);
411
412 if (!md)
413 return NULL;
414 md->pages = 0;
415 md->anon = 0;
416 md->mapped = 0;
417 md->mapcount_max = 0;
418 for_each_node(i)
419 md->node[i] =0;
420
421 for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
422 page = follow_page(vma, vaddr, 0);
423 if (page) {
424 int count = page_mapcount(page);
425
426 if (count)
427 md->mapped++;
428 if (count > md->mapcount_max)
429 md->mapcount_max = count;
430 md->pages++;
431 if (PageAnon(page))
432 md->anon++;
433 md->node[page_to_nid(page)]++;
434 }
435 cond_resched();
436 }
437 return md;
438}
439
440static int show_numa_map(struct seq_file *m, void *v)
441{
442 struct task_struct *task = m->private;
443 struct vm_area_struct *vma = v;
444 struct mempolicy *pol;
445 struct numa_maps *md;
446 struct zone **z;
447 int n;
448 int first;
449
450 if (!vma->vm_mm)
451 return 0;
452
453 md = get_numa_maps(vma);
454 if (!md)
455 return 0;
456
457 seq_printf(m, "%08lx", vma->vm_start);
458 pol = get_vma_policy(task, vma, vma->vm_start);
459 /* Print policy */
460 switch (pol->policy) {
461 case MPOL_PREFERRED:
462 seq_printf(m, " prefer=%d", pol->v.preferred_node);
463 break;
464 case MPOL_BIND:
465 seq_printf(m, " bind={");
466 first = 1;
467 for (z = pol->v.zonelist->zones; *z; z++) {
468
469 if (!first)
470 seq_putc(m, ',');
471 else
472 first = 0;
473 seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id,
474 (*z)->name);
475 }
476 seq_putc(m, '}');
477 break;
478 case MPOL_INTERLEAVE:
479 seq_printf(m, " interleave={");
480 first = 1;
481 for_each_node(n) {
482 if (node_isset(n, pol->v.nodes)) {
483 if (!first)
484 seq_putc(m,',');
485 else
486 first = 0;
487 seq_printf(m, "%d",n);
488 }
489 }
490 seq_putc(m, '}');
491 break;
492 default:
493 seq_printf(m," default");
494 break;
495 }
496 seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu",
497 md->mapcount_max, md->pages, md->mapped);
498 if (md->anon)
499 seq_printf(m," Anon=%lu",md->anon);
500
501 for_each_online_node(n) {
502 if (md->node[n])
503 seq_printf(m, " N%d=%lu", n, md->node[n]);
504 }
505 seq_putc(m, '\n');
506 kfree(md);
507 if (m->count < m->size) /* vma is copied successfully */
508 m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
509 return 0;
510}
511 394
512struct seq_operations proc_pid_numa_maps_op = { 395struct seq_operations proc_pid_numa_maps_op = {
513 .start = m_start, 396 .start = m_start,
514 .next = m_next, 397 .next = m_next,
515 .stop = m_stop, 398 .stop = m_stop,
516 .show = show_numa_map 399 .show = show_numa_map
517}; 400};
518#endif 401#endif
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 3b2e7b69e63a..4063fb32f78c 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -14,7 +14,6 @@
14#include <linux/a.out.h> 14#include <linux/a.out.h>
15#include <linux/elf.h> 15#include <linux/elf.h>
16#include <linux/elfcore.h> 16#include <linux/elfcore.h>
17#include <linux/proc_fs.h>
18#include <linux/highmem.h> 17#include <linux/highmem.h>
19#include <linux/bootmem.h> 18#include <linux/bootmem.h>
20#include <linux/init.h> 19#include <linux/init.h>
@@ -35,11 +34,14 @@ static size_t elfcorebuf_sz;
35/* Total size of vmcore file. */ 34/* Total size of vmcore file. */
36static u64 vmcore_size; 35static u64 vmcore_size;
37 36
37/* Stores the physical address of elf header of crash image. */
38unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
39
38struct proc_dir_entry *proc_vmcore = NULL; 40struct proc_dir_entry *proc_vmcore = NULL;
39 41
40/* Reads a page from the oldmem device from given offset. */ 42/* Reads a page from the oldmem device from given offset. */
41static ssize_t read_from_oldmem(char *buf, size_t count, 43static ssize_t read_from_oldmem(char *buf, size_t count,
42 loff_t *ppos, int userbuf) 44 u64 *ppos, int userbuf)
43{ 45{
44 unsigned long pfn, offset; 46 unsigned long pfn, offset;
45 size_t nr_bytes; 47 size_t nr_bytes;
diff --git a/fs/quota.c b/fs/quota.c
index 612e04db4b93..ba9e0bf32f67 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -15,6 +15,7 @@
15#include <linux/security.h> 15#include <linux/security.h>
16#include <linux/syscalls.h> 16#include <linux/syscalls.h>
17#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
18#include <linux/capability.h>
18#include <linux/quotaops.h> 19#include <linux/quotaops.h>
19 20
20/* Check validity of generic quotactl commands */ 21/* Check validity of generic quotactl commands */
@@ -168,7 +169,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
168 sync_blockdev(sb->s_bdev); 169 sync_blockdev(sb->s_bdev);
169 170
170 /* Now when everything is written we can discard the pagecache so 171 /* Now when everything is written we can discard the pagecache so
171 * that userspace sees the changes. We need i_sem and so we could 172 * that userspace sees the changes. We need i_mutex and so we could
172 * not do it inside dqonoff_sem. Moreover we need to be carefull 173 * not do it inside dqonoff_sem. Moreover we need to be carefull
173 * about races with quotaoff() (that is the reason why we have own 174 * about races with quotaoff() (that is the reason why we have own
174 * reference to inode). */ 175 * reference to inode). */
@@ -184,9 +185,9 @@ static void quota_sync_sb(struct super_block *sb, int type)
184 up(&sb_dqopt(sb)->dqonoff_sem); 185 up(&sb_dqopt(sb)->dqonoff_sem);
185 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 186 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
186 if (discard[cnt]) { 187 if (discard[cnt]) {
187 down(&discard[cnt]->i_sem); 188 mutex_lock(&discard[cnt]->i_mutex);
188 truncate_inode_pages(&discard[cnt]->i_data, 0); 189 truncate_inode_pages(&discard[cnt]->i_data, 0);
189 up(&discard[cnt]->i_sem); 190 mutex_unlock(&discard[cnt]->i_mutex);
190 iput(discard[cnt]); 191 iput(discard[cnt]);
191 } 192 }
192 } 193 }
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 7afcbb1b9376..a4ef91bb4f3b 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -35,7 +35,8 @@ static int v2_check_quota_file(struct super_block *sb, int type)
35 35
36 size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0); 36 size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
37 if (size != sizeof(struct v2_disk_dqheader)) { 37 if (size != sizeof(struct v2_disk_dqheader)) {
38 printk("failed read\n"); 38 printk("quota_v2: failed read expected=%d got=%d\n",
39 sizeof(struct v2_disk_dqheader), size);
39 return 0; 40 return 0;
40 } 41 }
41 if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] || 42 if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
diff --git a/fs/read_write.c b/fs/read_write.c
index df3468a22fea..3f7a1a62165f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -33,7 +33,7 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
33 long long retval; 33 long long retval;
34 struct inode *inode = file->f_mapping->host; 34 struct inode *inode = file->f_mapping->host;
35 35
36 down(&inode->i_sem); 36 mutex_lock(&inode->i_mutex);
37 switch (origin) { 37 switch (origin) {
38 case 2: 38 case 2:
39 offset += inode->i_size; 39 offset += inode->i_size;
@@ -49,7 +49,7 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
49 } 49 }
50 retval = offset; 50 retval = offset;
51 } 51 }
52 up(&inode->i_sem); 52 mutex_unlock(&inode->i_mutex);
53 return retval; 53 return retval;
54} 54}
55 55
diff --git a/fs/readdir.c b/fs/readdir.c
index b03579bc0210..b6109329b607 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -30,13 +30,13 @@ int vfs_readdir(struct file *file, filldir_t filler, void *buf)
30 if (res) 30 if (res)
31 goto out; 31 goto out;
32 32
33 down(&inode->i_sem); 33 mutex_lock(&inode->i_mutex);
34 res = -ENOENT; 34 res = -ENOENT;
35 if (!IS_DEADDIR(inode)) { 35 if (!IS_DEADDIR(inode)) {
36 res = file->f_op->readdir(file, buf, filler); 36 res = file->f_op->readdir(file, buf, filler);
37 file_accessed(file); 37 file_accessed(file);
38 } 38 }
39 up(&inode->i_sem); 39 mutex_unlock(&inode->i_mutex);
40out: 40out:
41 return res; 41 return res;
42} 42}
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 7892a865b58a..ad6fa964b0e7 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -49,7 +49,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
49 } 49 }
50 50
51 reiserfs_write_lock(inode->i_sb); 51 reiserfs_write_lock(inode->i_sb);
52 down(&inode->i_sem); 52 mutex_lock(&inode->i_mutex);
53 /* freeing preallocation only involves relogging blocks that 53 /* freeing preallocation only involves relogging blocks that
54 * are already in the current transaction. preallocation gets 54 * are already in the current transaction. preallocation gets
55 * freed at the end of each transaction, so it is impossible for 55 * freed at the end of each transaction, so it is impossible for
@@ -100,7 +100,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
100 err = reiserfs_truncate_file(inode, 0); 100 err = reiserfs_truncate_file(inode, 0);
101 } 101 }
102 out: 102 out:
103 up(&inode->i_sem); 103 mutex_unlock(&inode->i_mutex);
104 reiserfs_write_unlock(inode->i_sb); 104 reiserfs_write_unlock(inode->i_sb);
105 return err; 105 return err;
106} 106}
@@ -1342,7 +1342,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t
1342 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 1342 if (unlikely(!access_ok(VERIFY_READ, buf, count)))
1343 return -EFAULT; 1343 return -EFAULT;
1344 1344
1345 down(&inode->i_sem); // locks the entire file for just us 1345 mutex_lock(&inode->i_mutex); // locks the entire file for just us
1346 1346
1347 pos = *ppos; 1347 pos = *ppos;
1348 1348
@@ -1360,7 +1360,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t
1360 if (res) 1360 if (res)
1361 goto out; 1361 goto out;
1362 1362
1363 inode_update_time(inode, 1); /* Both mtime and ctime */ 1363 file_update_time(file);
1364 1364
1365 // Ok, we are done with all the checks. 1365 // Ok, we are done with all the checks.
1366 1366
@@ -1532,12 +1532,12 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t
1532 generic_osync_inode(inode, file->f_mapping, 1532 generic_osync_inode(inode, file->f_mapping,
1533 OSYNC_METADATA | OSYNC_DATA); 1533 OSYNC_METADATA | OSYNC_DATA);
1534 1534
1535 up(&inode->i_sem); 1535 mutex_unlock(&inode->i_mutex);
1536 reiserfs_async_progress_wait(inode->i_sb); 1536 reiserfs_async_progress_wait(inode->i_sb);
1537 return (already_written != 0) ? already_written : res; 1537 return (already_written != 0) ? already_written : res;
1538 1538
1539 out: 1539 out:
1540 up(&inode->i_sem); // unlock the file on exit. 1540 mutex_unlock(&inode->i_mutex); // unlock the file on exit.
1541 return res; 1541 return res;
1542} 1542}
1543 1543
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a5e3a0ddbe53..ffa34b861bdb 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -40,12 +40,12 @@ void reiserfs_delete_inode(struct inode *inode)
40 40
41 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ 41 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
42 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ 42 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
43 down(&inode->i_sem); 43 mutex_lock(&inode->i_mutex);
44 44
45 reiserfs_delete_xattrs(inode); 45 reiserfs_delete_xattrs(inode);
46 46
47 if (journal_begin(&th, inode->i_sb, jbegin_count)) { 47 if (journal_begin(&th, inode->i_sb, jbegin_count)) {
48 up(&inode->i_sem); 48 mutex_unlock(&inode->i_mutex);
49 goto out; 49 goto out;
50 } 50 }
51 reiserfs_update_inode_transaction(inode); 51 reiserfs_update_inode_transaction(inode);
@@ -59,11 +59,11 @@ void reiserfs_delete_inode(struct inode *inode)
59 DQUOT_FREE_INODE(inode); 59 DQUOT_FREE_INODE(inode);
60 60
61 if (journal_end(&th, inode->i_sb, jbegin_count)) { 61 if (journal_end(&th, inode->i_sb, jbegin_count)) {
62 up(&inode->i_sem); 62 mutex_unlock(&inode->i_mutex);
63 goto out; 63 goto out;
64 } 64 }
65 65
66 up(&inode->i_sem); 66 mutex_unlock(&inode->i_mutex);
67 67
68 /* check return value from reiserfs_delete_object after 68 /* check return value from reiserfs_delete_object after
69 * ending the transaction 69 * ending the transaction
@@ -551,7 +551,7 @@ static int convert_tail_for_hole(struct inode *inode,
551 551
552 /* we don't have to make sure the conversion did not happen while 552 /* we don't have to make sure the conversion did not happen while
553 ** we were locking the page because anyone that could convert 553 ** we were locking the page because anyone that could convert
554 ** must first take i_sem. 554 ** must first take i_mutex.
555 ** 555 **
556 ** We must fix the tail page for writing because it might have buffers 556 ** We must fix the tail page for writing because it might have buffers
557 ** that are mapped, but have a block number of 0. This indicates tail 557 ** that are mapped, but have a block number of 0. This indicates tail
@@ -586,7 +586,7 @@ static inline int _allocate_block(struct reiserfs_transaction_handle *th,
586 BUG_ON(!th->t_trans_id); 586 BUG_ON(!th->t_trans_id);
587 587
588#ifdef REISERFS_PREALLOCATE 588#ifdef REISERFS_PREALLOCATE
589 if (!(flags & GET_BLOCK_NO_ISEM)) { 589 if (!(flags & GET_BLOCK_NO_IMUX)) {
590 return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, 590 return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr,
591 path, block); 591 path, block);
592 } 592 }
@@ -2318,7 +2318,7 @@ static int map_block_for_writepage(struct inode *inode,
2318 /* this is where we fill in holes in the file. */ 2318 /* this is where we fill in holes in the file. */
2319 if (use_get_block) { 2319 if (use_get_block) {
2320 retval = reiserfs_get_block(inode, block, bh_result, 2320 retval = reiserfs_get_block(inode, block, bh_result,
2321 GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM 2321 GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX
2322 | GET_BLOCK_NO_DANGLE); 2322 | GET_BLOCK_NO_DANGLE);
2323 if (!retval) { 2323 if (!retval) {
2324 if (!buffer_mapped(bh_result) 2324 if (!buffer_mapped(bh_result)
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 81fc00285f60..745c88100895 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -2,6 +2,7 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5#include <linux/capability.h>
5#include <linux/fs.h> 6#include <linux/fs.h>
6#include <linux/reiserfs_fs.h> 7#include <linux/reiserfs_fs.h>
7#include <linux/time.h> 8#include <linux/time.h>
@@ -120,7 +121,7 @@ static int reiserfs_unpack(struct inode *inode, struct file *filp)
120 /* we need to make sure nobody is changing the file size beneath 121 /* we need to make sure nobody is changing the file size beneath
121 ** us 122 ** us
122 */ 123 */
123 down(&inode->i_sem); 124 mutex_lock(&inode->i_mutex);
124 125
125 write_from = inode->i_size & (blocksize - 1); 126 write_from = inode->i_size & (blocksize - 1);
126 /* if we are on a block boundary, we are already unpacked. */ 127 /* if we are on a block boundary, we are already unpacked. */
@@ -156,7 +157,7 @@ static int reiserfs_unpack(struct inode *inode, struct file *filp)
156 page_cache_release(page); 157 page_cache_release(page);
157 158
158 out: 159 out:
159 up(&inode->i_sem); 160 mutex_unlock(&inode->i_mutex);
160 reiserfs_write_unlock(inode->i_sb); 161 reiserfs_write_unlock(inode->i_sb);
161 return retval; 162 return retval;
162} 163}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3f17ef844fb6..4491fcf2a0e6 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -3925,10 +3925,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3925 flush = 1; 3925 flush = 1;
3926 } 3926 }
3927#ifdef REISERFS_PREALLOCATE 3927#ifdef REISERFS_PREALLOCATE
3928 /* quota ops might need to nest, setup the journal_info pointer for them */ 3928 /* quota ops might need to nest, setup the journal_info pointer for them
3929 * and raise the refcount so that it is > 0. */
3929 current->journal_info = th; 3930 current->journal_info = th;
3931 th->t_refcount++;
3930 reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into 3932 reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into
3931 * the transaction */ 3933 * the transaction */
3934 th->t_refcount--;
3932 current->journal_info = th->t_handle_save; 3935 current->journal_info = th->t_handle_save;
3933#endif 3936#endif
3934 3937
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 3549067c42d9..8f8d8d01107c 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -375,11 +375,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
375 return ERR_PTR(-EIO); 375 return ERR_PTR(-EIO);
376 } 376 }
377 377
378 if (inode) 378 return d_splice_alias(inode, dentry);
379 return d_splice_alias(inode, dentry);
380
381 d_add(dentry, inode);
382 return NULL;
383} 379}
384 380
385/* 381/*
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 42afb5bef111..397d9590c8f2 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2211,7 +2211,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2211 size_t towrite = len; 2211 size_t towrite = len;
2212 struct buffer_head tmp_bh, *bh; 2212 struct buffer_head tmp_bh, *bh;
2213 2213
2214 down(&inode->i_sem); 2214 mutex_lock(&inode->i_mutex);
2215 while (towrite > 0) { 2215 while (towrite > 0) {
2216 tocopy = sb->s_blocksize - offset < towrite ? 2216 tocopy = sb->s_blocksize - offset < towrite ?
2217 sb->s_blocksize - offset : towrite; 2217 sb->s_blocksize - offset : towrite;
@@ -2250,7 +2250,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2250 inode->i_version++; 2250 inode->i_version++;
2251 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2251 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2252 mark_inode_dirty(inode); 2252 mark_inode_dirty(inode);
2253 up(&inode->i_sem); 2253 mutex_unlock(&inode->i_mutex);
2254 return len - towrite; 2254 return len - towrite;
2255} 2255}
2256 2256
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index c92e124f628e..196e971c03c9 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -205,7 +205,7 @@ int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_in
205 1) * p_s_sb->s_blocksize; 205 1) * p_s_sb->s_blocksize;
206 pos1 = pos; 206 pos1 = pos;
207 207
208 // we are protected by i_sem. The tail can not disapper, not 208 // we are protected by i_mutex. The tail can not disapper, not
209 // append can be done either 209 // append can be done either
210 // we are in truncate or packing tail in file_release 210 // we are in truncate or packing tail in file_release
211 211
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 02091eaac0b4..cc061bfd437b 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -30,6 +30,7 @@
30 */ 30 */
31 31
32#include <linux/reiserfs_fs.h> 32#include <linux/reiserfs_fs.h>
33#include <linux/capability.h>
33#include <linux/dcache.h> 34#include <linux/dcache.h>
34#include <linux/namei.h> 35#include <linux/namei.h>
35#include <linux/errno.h> 36#include <linux/errno.h>
@@ -67,11 +68,11 @@ static struct dentry *create_xa_root(struct super_block *sb)
67 goto out; 68 goto out;
68 } else if (!xaroot->d_inode) { 69 } else if (!xaroot->d_inode) {
69 int err; 70 int err;
70 down(&privroot->d_inode->i_sem); 71 mutex_lock(&privroot->d_inode->i_mutex);
71 err = 72 err =
72 privroot->d_inode->i_op->mkdir(privroot->d_inode, xaroot, 73 privroot->d_inode->i_op->mkdir(privroot->d_inode, xaroot,
73 0700); 74 0700);
74 up(&privroot->d_inode->i_sem); 75 mutex_unlock(&privroot->d_inode->i_mutex);
75 76
76 if (err) { 77 if (err) {
77 dput(xaroot); 78 dput(xaroot);
@@ -219,7 +220,7 @@ static struct dentry *get_xa_file_dentry(const struct inode *inode,
219 } else if (flags & XATTR_REPLACE || flags & FL_READONLY) { 220 } else if (flags & XATTR_REPLACE || flags & FL_READONLY) {
220 goto out; 221 goto out;
221 } else { 222 } else {
222 /* inode->i_sem is down, so nothing else can try to create 223 /* inode->i_mutex is down, so nothing else can try to create
223 * the same xattr */ 224 * the same xattr */
224 err = xadir->d_inode->i_op->create(xadir->d_inode, xafile, 225 err = xadir->d_inode->i_op->create(xadir->d_inode, xafile,
225 0700 | S_IFREG, NULL); 226 0700 | S_IFREG, NULL);
@@ -268,7 +269,7 @@ static struct file *open_xa_file(const struct inode *inode, const char *name,
268 * and don't mess with f->f_pos, but the idea is the same. Do some 269 * and don't mess with f->f_pos, but the idea is the same. Do some
269 * action on each and every entry in the directory. 270 * action on each and every entry in the directory.
270 * 271 *
271 * we're called with i_sem held, so there are no worries about the directory 272 * we're called with i_mutex held, so there are no worries about the directory
272 * changing underneath us. 273 * changing underneath us.
273 */ 274 */
274static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir) 275static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir)
@@ -426,7 +427,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf)
426 int res = -ENOTDIR; 427 int res = -ENOTDIR;
427 if (!file->f_op || !file->f_op->readdir) 428 if (!file->f_op || !file->f_op->readdir)
428 goto out; 429 goto out;
429 down(&inode->i_sem); 430 mutex_lock(&inode->i_mutex);
430// down(&inode->i_zombie); 431// down(&inode->i_zombie);
431 res = -ENOENT; 432 res = -ENOENT;
432 if (!IS_DEADDIR(inode)) { 433 if (!IS_DEADDIR(inode)) {
@@ -435,7 +436,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf)
435 unlock_kernel(); 436 unlock_kernel();
436 } 437 }
437// up(&inode->i_zombie); 438// up(&inode->i_zombie);
438 up(&inode->i_sem); 439 mutex_unlock(&inode->i_mutex);
439 out: 440 out:
440 return res; 441 return res;
441} 442}
@@ -480,7 +481,7 @@ static inline __u32 xattr_hash(const char *msg, int len)
480/* Generic extended attribute operations that can be used by xa plugins */ 481/* Generic extended attribute operations that can be used by xa plugins */
481 482
482/* 483/*
483 * inode->i_sem: down 484 * inode->i_mutex: down
484 */ 485 */
485int 486int
486reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, 487reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
@@ -497,12 +498,6 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
497 struct iattr newattrs; 498 struct iattr newattrs;
498 __u32 xahash = 0; 499 __u32 xahash = 0;
499 500
500 if (IS_RDONLY(inode))
501 return -EROFS;
502
503 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
504 return -EPERM;
505
506 if (get_inode_sd_version(inode) == STAT_DATA_V1) 501 if (get_inode_sd_version(inode) == STAT_DATA_V1)
507 return -EOPNOTSUPP; 502 return -EOPNOTSUPP;
508 503
@@ -535,7 +530,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
535 /* Resize it so we're ok to write there */ 530 /* Resize it so we're ok to write there */
536 newattrs.ia_size = buffer_size; 531 newattrs.ia_size = buffer_size;
537 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; 532 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
538 down(&xinode->i_sem); 533 mutex_lock(&xinode->i_mutex);
539 err = notify_change(fp->f_dentry, &newattrs); 534 err = notify_change(fp->f_dentry, &newattrs);
540 if (err) 535 if (err)
541 goto out_filp; 536 goto out_filp;
@@ -598,7 +593,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
598 } 593 }
599 594
600 out_filp: 595 out_filp:
601 up(&xinode->i_sem); 596 mutex_unlock(&xinode->i_mutex);
602 fput(fp); 597 fput(fp);
603 598
604 out: 599 out:
@@ -606,7 +601,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
606} 601}
607 602
608/* 603/*
609 * inode->i_sem: down 604 * inode->i_mutex: down
610 */ 605 */
611int 606int
612reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer, 607reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
@@ -758,9 +753,6 @@ int reiserfs_xattr_del(struct inode *inode, const char *name)
758 struct dentry *dir; 753 struct dentry *dir;
759 int err; 754 int err;
760 755
761 if (IS_RDONLY(inode))
762 return -EROFS;
763
764 dir = open_xa_dir(inode, FL_READONLY); 756 dir = open_xa_dir(inode, FL_READONLY);
765 if (IS_ERR(dir)) { 757 if (IS_ERR(dir)) {
766 err = PTR_ERR(dir); 758 err = PTR_ERR(dir);
@@ -793,7 +785,7 @@ reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen,
793 785
794} 786}
795 787
796/* This is called w/ inode->i_sem downed */ 788/* This is called w/ inode->i_mutex downed */
797int reiserfs_delete_xattrs(struct inode *inode) 789int reiserfs_delete_xattrs(struct inode *inode)
798{ 790{
799 struct file *fp; 791 struct file *fp;
@@ -946,7 +938,7 @@ int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
946 938
947/* 939/*
948 * Inode operation getxattr() 940 * Inode operation getxattr()
949 * Preliminary locking: we down dentry->d_inode->i_sem 941 * Preliminary locking: we down dentry->d_inode->i_mutex
950 */ 942 */
951ssize_t 943ssize_t
952reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, 944reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
@@ -970,7 +962,7 @@ reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
970/* 962/*
971 * Inode operation setxattr() 963 * Inode operation setxattr()
972 * 964 *
973 * dentry->d_inode->i_sem down 965 * dentry->d_inode->i_mutex down
974 */ 966 */
975int 967int
976reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, 968reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
@@ -984,12 +976,6 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
984 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) 976 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
985 return -EOPNOTSUPP; 977 return -EOPNOTSUPP;
986 978
987 if (IS_RDONLY(dentry->d_inode))
988 return -EROFS;
989
990 if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode))
991 return -EROFS;
992
993 reiserfs_write_lock_xattr_i(dentry->d_inode); 979 reiserfs_write_lock_xattr_i(dentry->d_inode);
994 lock = !has_xattr_dir(dentry->d_inode); 980 lock = !has_xattr_dir(dentry->d_inode);
995 if (lock) 981 if (lock)
@@ -1008,7 +994,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
1008/* 994/*
1009 * Inode operation removexattr() 995 * Inode operation removexattr()
1010 * 996 *
1011 * dentry->d_inode->i_sem down 997 * dentry->d_inode->i_mutex down
1012 */ 998 */
1013int reiserfs_removexattr(struct dentry *dentry, const char *name) 999int reiserfs_removexattr(struct dentry *dentry, const char *name)
1014{ 1000{
@@ -1019,12 +1005,6 @@ int reiserfs_removexattr(struct dentry *dentry, const char *name)
1019 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) 1005 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
1020 return -EOPNOTSUPP; 1006 return -EOPNOTSUPP;
1021 1007
1022 if (IS_RDONLY(dentry->d_inode))
1023 return -EROFS;
1024
1025 if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode))
1026 return -EPERM;
1027
1028 reiserfs_write_lock_xattr_i(dentry->d_inode); 1008 reiserfs_write_lock_xattr_i(dentry->d_inode);
1029 reiserfs_read_lock_xattrs(dentry->d_sb); 1009 reiserfs_read_lock_xattrs(dentry->d_sb);
1030 1010
@@ -1091,7 +1071,7 @@ reiserfs_listxattr_filler(void *buf, const char *name, int namelen,
1091/* 1071/*
1092 * Inode operation listxattr() 1072 * Inode operation listxattr()
1093 * 1073 *
1094 * Preliminary locking: we down dentry->d_inode->i_sem 1074 * Preliminary locking: we down dentry->d_inode->i_mutex
1095 */ 1075 */
1096ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) 1076ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
1097{ 1077{
@@ -1289,9 +1269,9 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
1289 if (!IS_ERR(dentry)) { 1269 if (!IS_ERR(dentry)) {
1290 if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) { 1270 if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) {
1291 struct inode *inode = dentry->d_parent->d_inode; 1271 struct inode *inode = dentry->d_parent->d_inode;
1292 down(&inode->i_sem); 1272 mutex_lock(&inode->i_mutex);
1293 err = inode->i_op->mkdir(inode, dentry, 0700); 1273 err = inode->i_op->mkdir(inode, dentry, 0700);
1294 up(&inode->i_sem); 1274 mutex_unlock(&inode->i_mutex);
1295 if (err) { 1275 if (err) {
1296 dput(dentry); 1276 dput(dentry);
1297 dentry = NULL; 1277 dentry = NULL;
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index a47ac9aac8b2..43de3ba83332 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -1,3 +1,4 @@
1#include <linux/capability.h>
1#include <linux/fs.h> 2#include <linux/fs.h>
2#include <linux/posix_acl.h> 3#include <linux/posix_acl.h>
3#include <linux/reiserfs_fs.h> 4#include <linux/reiserfs_fs.h>
@@ -174,7 +175,7 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
174/* 175/*
175 * Inode operation get_posix_acl(). 176 * Inode operation get_posix_acl().
176 * 177 *
177 * inode->i_sem: down 178 * inode->i_mutex: down
178 * BKL held [before 2.5.x] 179 * BKL held [before 2.5.x]
179 */ 180 */
180struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) 181struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
@@ -237,7 +238,7 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
237/* 238/*
238 * Inode operation set_posix_acl(). 239 * Inode operation set_posix_acl().
239 * 240 *
240 * inode->i_sem: down 241 * inode->i_mutex: down
241 * BKL held [before 2.5.x] 242 * BKL held [before 2.5.x]
242 */ 243 */
243static int 244static int
@@ -312,7 +313,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
312 return error; 313 return error;
313} 314}
314 315
315/* dir->i_sem: down, 316/* dir->i_mutex: locked,
316 * inode is new and not released into the wild yet */ 317 * inode is new and not released into the wild yet */
317int 318int
318reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry, 319reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry,
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 2501f7e66ab9..024a938ca60f 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -1,4 +1,5 @@
1#include <linux/reiserfs_fs.h> 1#include <linux/reiserfs_fs.h>
2#include <linux/capability.h>
2#include <linux/errno.h> 3#include <linux/errno.h>
3#include <linux/fs.h> 4#include <linux/fs.h>
4#include <linux/pagemap.h> 5#include <linux/pagemap.h>
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 51458048ca66..073f39364b11 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -16,18 +16,10 @@ static int
16user_get(struct inode *inode, const char *name, void *buffer, size_t size) 16user_get(struct inode *inode, const char *name, void *buffer, size_t size)
17{ 17{
18 18
19 int error;
20
21 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 19 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
22 return -EINVAL; 20 return -EINVAL;
23
24 if (!reiserfs_xattrs_user(inode->i_sb)) 21 if (!reiserfs_xattrs_user(inode->i_sb))
25 return -EOPNOTSUPP; 22 return -EOPNOTSUPP;
26
27 error = reiserfs_permission_locked(inode, MAY_READ, NULL);
28 if (error)
29 return error;
30
31 return reiserfs_xattr_get(inode, name, buffer, size); 23 return reiserfs_xattr_get(inode, name, buffer, size);
32} 24}
33 25
@@ -36,43 +28,21 @@ user_set(struct inode *inode, const char *name, const void *buffer,
36 size_t size, int flags) 28 size_t size, int flags)
37{ 29{
38 30
39 int error;
40
41 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 31 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
42 return -EINVAL; 32 return -EINVAL;
43 33
44 if (!reiserfs_xattrs_user(inode->i_sb)) 34 if (!reiserfs_xattrs_user(inode->i_sb))
45 return -EOPNOTSUPP; 35 return -EOPNOTSUPP;
46
47 if (!S_ISREG(inode->i_mode) &&
48 (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
49 return -EPERM;
50
51 error = reiserfs_permission_locked(inode, MAY_WRITE, NULL);
52 if (error)
53 return error;
54
55 return reiserfs_xattr_set(inode, name, buffer, size, flags); 36 return reiserfs_xattr_set(inode, name, buffer, size, flags);
56} 37}
57 38
58static int user_del(struct inode *inode, const char *name) 39static int user_del(struct inode *inode, const char *name)
59{ 40{
60 int error;
61
62 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 41 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
63 return -EINVAL; 42 return -EINVAL;
64 43
65 if (!reiserfs_xattrs_user(inode->i_sb)) 44 if (!reiserfs_xattrs_user(inode->i_sb))
66 return -EOPNOTSUPP; 45 return -EOPNOTSUPP;
67
68 if (!S_ISREG(inode->i_mode) &&
69 (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
70 return -EPERM;
71
72 error = reiserfs_permission_locked(inode, MAY_WRITE, NULL);
73 if (error)
74 return error;
75
76 return 0; 46 return 0;
77} 47}
78 48
diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c
index 84e21ffa5ca8..10187812771e 100644
--- a/fs/relayfs/buffers.c
+++ b/fs/relayfs/buffers.c
@@ -185,5 +185,6 @@ void relay_destroy_buf(struct rchan_buf *buf)
185void relay_remove_buf(struct kref *kref) 185void relay_remove_buf(struct kref *kref)
186{ 186{
187 struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); 187 struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
188 relayfs_remove(buf->dentry); 188 buf->chan->cb->remove_buf_file(buf->dentry);
189 relay_destroy_buf(buf);
189} 190}
diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c
index 0f7f88d067ad..383523011aad 100644
--- a/fs/relayfs/inode.c
+++ b/fs/relayfs/inode.c
@@ -26,31 +26,22 @@
26 26
27static struct vfsmount * relayfs_mount; 27static struct vfsmount * relayfs_mount;
28static int relayfs_mount_count; 28static int relayfs_mount_count;
29static kmem_cache_t * relayfs_inode_cachep;
30 29
31static struct backing_dev_info relayfs_backing_dev_info = { 30static struct backing_dev_info relayfs_backing_dev_info = {
32 .ra_pages = 0, /* No readahead */ 31 .ra_pages = 0, /* No readahead */
33 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 32 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
34}; 33};
35 34
36static struct inode *relayfs_get_inode(struct super_block *sb, int mode, 35static struct inode *relayfs_get_inode(struct super_block *sb,
37 struct rchan *chan) 36 int mode,
37 struct file_operations *fops,
38 void *data)
38{ 39{
39 struct rchan_buf *buf = NULL;
40 struct inode *inode; 40 struct inode *inode;
41 41
42 if (S_ISREG(mode)) {
43 BUG_ON(!chan);
44 buf = relay_create_buf(chan);
45 if (!buf)
46 return NULL;
47 }
48
49 inode = new_inode(sb); 42 inode = new_inode(sb);
50 if (!inode) { 43 if (!inode)
51 relay_destroy_buf(buf);
52 return NULL; 44 return NULL;
53 }
54 45
55 inode->i_mode = mode; 46 inode->i_mode = mode;
56 inode->i_uid = 0; 47 inode->i_uid = 0;
@@ -61,8 +52,9 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
61 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 52 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
62 switch (mode & S_IFMT) { 53 switch (mode & S_IFMT) {
63 case S_IFREG: 54 case S_IFREG:
64 inode->i_fop = &relayfs_file_operations; 55 inode->i_fop = fops;
65 RELAYFS_I(inode)->buf = buf; 56 if (data)
57 inode->u.generic_ip = data;
66 break; 58 break;
67 case S_IFDIR: 59 case S_IFDIR:
68 inode->i_op = &simple_dir_inode_operations; 60 inode->i_op = &simple_dir_inode_operations;
@@ -83,7 +75,8 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
83 * @name: the name of the file to create 75 * @name: the name of the file to create
84 * @parent: parent directory 76 * @parent: parent directory
85 * @mode: mode 77 * @mode: mode
86 * @chan: relay channel associated with the file 78 * @fops: file operations to use for the file
79 * @data: user-associated data for this file
87 * 80 *
88 * Returns the new dentry, NULL on failure 81 * Returns the new dentry, NULL on failure
89 * 82 *
@@ -92,7 +85,8 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
92static struct dentry *relayfs_create_entry(const char *name, 85static struct dentry *relayfs_create_entry(const char *name,
93 struct dentry *parent, 86 struct dentry *parent,
94 int mode, 87 int mode,
95 struct rchan *chan) 88 struct file_operations *fops,
89 void *data)
96{ 90{
97 struct dentry *d; 91 struct dentry *d;
98 struct inode *inode; 92 struct inode *inode;
@@ -115,7 +109,7 @@ static struct dentry *relayfs_create_entry(const char *name,
115 } 109 }
116 110
117 parent = dget(parent); 111 parent = dget(parent);
118 down(&parent->d_inode->i_sem); 112 mutex_lock(&parent->d_inode->i_mutex);
119 d = lookup_one_len(name, parent, strlen(name)); 113 d = lookup_one_len(name, parent, strlen(name));
120 if (IS_ERR(d)) { 114 if (IS_ERR(d)) {
121 d = NULL; 115 d = NULL;
@@ -127,7 +121,7 @@ static struct dentry *relayfs_create_entry(const char *name,
127 goto release_mount; 121 goto release_mount;
128 } 122 }
129 123
130 inode = relayfs_get_inode(parent->d_inode->i_sb, mode, chan); 124 inode = relayfs_get_inode(parent->d_inode->i_sb, mode, fops, data);
131 if (!inode) { 125 if (!inode) {
132 d = NULL; 126 d = NULL;
133 goto release_mount; 127 goto release_mount;
@@ -145,7 +139,7 @@ release_mount:
145 simple_release_fs(&relayfs_mount, &relayfs_mount_count); 139 simple_release_fs(&relayfs_mount, &relayfs_mount_count);
146 140
147exit: 141exit:
148 up(&parent->d_inode->i_sem); 142 mutex_unlock(&parent->d_inode->i_mutex);
149 dput(parent); 143 dput(parent);
150 return d; 144 return d;
151} 145}
@@ -155,20 +149,26 @@ exit:
155 * @name: the name of the file to create 149 * @name: the name of the file to create
156 * @parent: parent directory 150 * @parent: parent directory
157 * @mode: mode, if not specied the default perms are used 151 * @mode: mode, if not specied the default perms are used
158 * @chan: channel associated with the file 152 * @fops: file operations to use for the file
153 * @data: user-associated data for this file
159 * 154 *
160 * Returns file dentry if successful, NULL otherwise. 155 * Returns file dentry if successful, NULL otherwise.
161 * 156 *
162 * The file will be created user r on behalf of current user. 157 * The file will be created user r on behalf of current user.
163 */ 158 */
164struct dentry *relayfs_create_file(const char *name, struct dentry *parent, 159struct dentry *relayfs_create_file(const char *name,
165 int mode, struct rchan *chan) 160 struct dentry *parent,
161 int mode,
162 struct file_operations *fops,
163 void *data)
166{ 164{
165 BUG_ON(!fops);
166
167 if (!mode) 167 if (!mode)
168 mode = S_IRUSR; 168 mode = S_IRUSR;
169 mode = (mode & S_IALLUGO) | S_IFREG; 169 mode = (mode & S_IALLUGO) | S_IFREG;
170 170
171 return relayfs_create_entry(name, parent, mode, chan); 171 return relayfs_create_entry(name, parent, mode, fops, data);
172} 172}
173 173
174/** 174/**
@@ -183,7 +183,7 @@ struct dentry *relayfs_create_file(const char *name, struct dentry *parent,
183struct dentry *relayfs_create_dir(const char *name, struct dentry *parent) 183struct dentry *relayfs_create_dir(const char *name, struct dentry *parent)
184{ 184{
185 int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; 185 int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
186 return relayfs_create_entry(name, parent, mode, NULL); 186 return relayfs_create_entry(name, parent, mode, NULL, NULL);
187} 187}
188 188
189/** 189/**
@@ -204,7 +204,7 @@ int relayfs_remove(struct dentry *dentry)
204 return -EINVAL; 204 return -EINVAL;
205 205
206 parent = dget(parent); 206 parent = dget(parent);
207 down(&parent->d_inode->i_sem); 207 mutex_lock(&parent->d_inode->i_mutex);
208 if (dentry->d_inode) { 208 if (dentry->d_inode) {
209 if (S_ISDIR(dentry->d_inode->i_mode)) 209 if (S_ISDIR(dentry->d_inode->i_mode))
210 error = simple_rmdir(parent->d_inode, dentry); 210 error = simple_rmdir(parent->d_inode, dentry);
@@ -215,7 +215,7 @@ int relayfs_remove(struct dentry *dentry)
215 } 215 }
216 if (!error) 216 if (!error)
217 dput(dentry); 217 dput(dentry);
218 up(&parent->d_inode->i_sem); 218 mutex_unlock(&parent->d_inode->i_mutex);
219 dput(parent); 219 dput(parent);
220 220
221 if (!error) 221 if (!error)
@@ -225,6 +225,17 @@ int relayfs_remove(struct dentry *dentry)
225} 225}
226 226
227/** 227/**
228 * relayfs_remove_file - remove a file from relay filesystem
229 * @dentry: directory dentry
230 *
231 * Returns 0 if successful, negative otherwise.
232 */
233int relayfs_remove_file(struct dentry *dentry)
234{
235 return relayfs_remove(dentry);
236}
237
238/**
228 * relayfs_remove_dir - remove a directory in the relay filesystem 239 * relayfs_remove_dir - remove a directory in the relay filesystem
229 * @dentry: directory dentry 240 * @dentry: directory dentry
230 * 241 *
@@ -236,45 +247,45 @@ int relayfs_remove_dir(struct dentry *dentry)
236} 247}
237 248
238/** 249/**
239 * relayfs_open - open file op for relayfs files 250 * relay_file_open - open file op for relay files
240 * @inode: the inode 251 * @inode: the inode
241 * @filp: the file 252 * @filp: the file
242 * 253 *
243 * Increments the channel buffer refcount. 254 * Increments the channel buffer refcount.
244 */ 255 */
245static int relayfs_open(struct inode *inode, struct file *filp) 256static int relay_file_open(struct inode *inode, struct file *filp)
246{ 257{
247 struct rchan_buf *buf = RELAYFS_I(inode)->buf; 258 struct rchan_buf *buf = inode->u.generic_ip;
248 kref_get(&buf->kref); 259 kref_get(&buf->kref);
260 filp->private_data = buf;
249 261
250 return 0; 262 return 0;
251} 263}
252 264
253/** 265/**
254 * relayfs_mmap - mmap file op for relayfs files 266 * relay_file_mmap - mmap file op for relay files
255 * @filp: the file 267 * @filp: the file
256 * @vma: the vma describing what to map 268 * @vma: the vma describing what to map
257 * 269 *
258 * Calls upon relay_mmap_buf to map the file into user space. 270 * Calls upon relay_mmap_buf to map the file into user space.
259 */ 271 */
260static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma) 272static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma)
261{ 273{
262 struct inode *inode = filp->f_dentry->d_inode; 274 struct rchan_buf *buf = filp->private_data;
263 return relay_mmap_buf(RELAYFS_I(inode)->buf, vma); 275 return relay_mmap_buf(buf, vma);
264} 276}
265 277
266/** 278/**
267 * relayfs_poll - poll file op for relayfs files 279 * relay_file_poll - poll file op for relay files
268 * @filp: the file 280 * @filp: the file
269 * @wait: poll table 281 * @wait: poll table
270 * 282 *
271 * Poll implemention. 283 * Poll implemention.
272 */ 284 */
273static unsigned int relayfs_poll(struct file *filp, poll_table *wait) 285static unsigned int relay_file_poll(struct file *filp, poll_table *wait)
274{ 286{
275 unsigned int mask = 0; 287 unsigned int mask = 0;
276 struct inode *inode = filp->f_dentry->d_inode; 288 struct rchan_buf *buf = filp->private_data;
277 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
278 289
279 if (buf->finalized) 290 if (buf->finalized)
280 return POLLERR; 291 return POLLERR;
@@ -289,27 +300,27 @@ static unsigned int relayfs_poll(struct file *filp, poll_table *wait)
289} 300}
290 301
291/** 302/**
292 * relayfs_release - release file op for relayfs files 303 * relay_file_release - release file op for relay files
293 * @inode: the inode 304 * @inode: the inode
294 * @filp: the file 305 * @filp: the file
295 * 306 *
296 * Decrements the channel refcount, as the filesystem is 307 * Decrements the channel refcount, as the filesystem is
297 * no longer using it. 308 * no longer using it.
298 */ 309 */
299static int relayfs_release(struct inode *inode, struct file *filp) 310static int relay_file_release(struct inode *inode, struct file *filp)
300{ 311{
301 struct rchan_buf *buf = RELAYFS_I(inode)->buf; 312 struct rchan_buf *buf = filp->private_data;
302 kref_put(&buf->kref, relay_remove_buf); 313 kref_put(&buf->kref, relay_remove_buf);
303 314
304 return 0; 315 return 0;
305} 316}
306 317
307/** 318/**
308 * relayfs_read_consume - update the consumed count for the buffer 319 * relay_file_read_consume - update the consumed count for the buffer
309 */ 320 */
310static void relayfs_read_consume(struct rchan_buf *buf, 321static void relay_file_read_consume(struct rchan_buf *buf,
311 size_t read_pos, 322 size_t read_pos,
312 size_t bytes_consumed) 323 size_t bytes_consumed)
313{ 324{
314 size_t subbuf_size = buf->chan->subbuf_size; 325 size_t subbuf_size = buf->chan->subbuf_size;
315 size_t n_subbufs = buf->chan->n_subbufs; 326 size_t n_subbufs = buf->chan->n_subbufs;
@@ -332,9 +343,9 @@ static void relayfs_read_consume(struct rchan_buf *buf,
332} 343}
333 344
334/** 345/**
335 * relayfs_read_avail - boolean, are there unconsumed bytes available? 346 * relay_file_read_avail - boolean, are there unconsumed bytes available?
336 */ 347 */
337static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos) 348static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos)
338{ 349{
339 size_t bytes_produced, bytes_consumed, write_offset; 350 size_t bytes_produced, bytes_consumed, write_offset;
340 size_t subbuf_size = buf->chan->subbuf_size; 351 size_t subbuf_size = buf->chan->subbuf_size;
@@ -365,16 +376,16 @@ static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos)
365 if (bytes_produced == bytes_consumed) 376 if (bytes_produced == bytes_consumed)
366 return 0; 377 return 0;
367 378
368 relayfs_read_consume(buf, read_pos, 0); 379 relay_file_read_consume(buf, read_pos, 0);
369 380
370 return 1; 381 return 1;
371} 382}
372 383
373/** 384/**
374 * relayfs_read_subbuf_avail - return bytes available in sub-buffer 385 * relay_file_read_subbuf_avail - return bytes available in sub-buffer
375 */ 386 */
376static size_t relayfs_read_subbuf_avail(size_t read_pos, 387static size_t relay_file_read_subbuf_avail(size_t read_pos,
377 struct rchan_buf *buf) 388 struct rchan_buf *buf)
378{ 389{
379 size_t padding, avail = 0; 390 size_t padding, avail = 0;
380 size_t read_subbuf, read_offset, write_subbuf, write_offset; 391 size_t read_subbuf, read_offset, write_subbuf, write_offset;
@@ -396,14 +407,14 @@ static size_t relayfs_read_subbuf_avail(size_t read_pos,
396} 407}
397 408
398/** 409/**
399 * relayfs_read_start_pos - find the first available byte to read 410 * relay_file_read_start_pos - find the first available byte to read
400 * 411 *
401 * If the read_pos is in the middle of padding, return the 412 * If the read_pos is in the middle of padding, return the
402 * position of the first actually available byte, otherwise 413 * position of the first actually available byte, otherwise
403 * return the original value. 414 * return the original value.
404 */ 415 */
405static size_t relayfs_read_start_pos(size_t read_pos, 416static size_t relay_file_read_start_pos(size_t read_pos,
406 struct rchan_buf *buf) 417 struct rchan_buf *buf)
407{ 418{
408 size_t read_subbuf, padding, padding_start, padding_end; 419 size_t read_subbuf, padding, padding_start, padding_end;
409 size_t subbuf_size = buf->chan->subbuf_size; 420 size_t subbuf_size = buf->chan->subbuf_size;
@@ -422,11 +433,11 @@ static size_t relayfs_read_start_pos(size_t read_pos,
422} 433}
423 434
424/** 435/**
425 * relayfs_read_end_pos - return the new read position 436 * relay_file_read_end_pos - return the new read position
426 */ 437 */
427static size_t relayfs_read_end_pos(struct rchan_buf *buf, 438static size_t relay_file_read_end_pos(struct rchan_buf *buf,
428 size_t read_pos, 439 size_t read_pos,
429 size_t count) 440 size_t count)
430{ 441{
431 size_t read_subbuf, padding, end_pos; 442 size_t read_subbuf, padding, end_pos;
432 size_t subbuf_size = buf->chan->subbuf_size; 443 size_t subbuf_size = buf->chan->subbuf_size;
@@ -445,7 +456,7 @@ static size_t relayfs_read_end_pos(struct rchan_buf *buf,
445} 456}
446 457
447/** 458/**
448 * relayfs_read - read file op for relayfs files 459 * relay_file_read - read file op for relay files
449 * @filp: the file 460 * @filp: the file
450 * @buffer: the userspace buffer 461 * @buffer: the userspace buffer
451 * @count: number of bytes to read 462 * @count: number of bytes to read
@@ -454,23 +465,23 @@ static size_t relayfs_read_end_pos(struct rchan_buf *buf,
454 * Reads count bytes or the number of bytes available in the 465 * Reads count bytes or the number of bytes available in the
455 * current sub-buffer being read, whichever is smaller. 466 * current sub-buffer being read, whichever is smaller.
456 */ 467 */
457static ssize_t relayfs_read(struct file *filp, 468static ssize_t relay_file_read(struct file *filp,
458 char __user *buffer, 469 char __user *buffer,
459 size_t count, 470 size_t count,
460 loff_t *ppos) 471 loff_t *ppos)
461{ 472{
473 struct rchan_buf *buf = filp->private_data;
462 struct inode *inode = filp->f_dentry->d_inode; 474 struct inode *inode = filp->f_dentry->d_inode;
463 struct rchan_buf *buf = RELAYFS_I(inode)->buf;
464 size_t read_start, avail; 475 size_t read_start, avail;
465 ssize_t ret = 0; 476 ssize_t ret = 0;
466 void *from; 477 void *from;
467 478
468 down(&inode->i_sem); 479 mutex_lock(&inode->i_mutex);
469 if(!relayfs_read_avail(buf, *ppos)) 480 if(!relay_file_read_avail(buf, *ppos))
470 goto out; 481 goto out;
471 482
472 read_start = relayfs_read_start_pos(*ppos, buf); 483 read_start = relay_file_read_start_pos(*ppos, buf);
473 avail = relayfs_read_subbuf_avail(read_start, buf); 484 avail = relay_file_read_subbuf_avail(read_start, buf);
474 if (!avail) 485 if (!avail)
475 goto out; 486 goto out;
476 487
@@ -480,58 +491,25 @@ static ssize_t relayfs_read(struct file *filp,
480 ret = -EFAULT; 491 ret = -EFAULT;
481 goto out; 492 goto out;
482 } 493 }
483 relayfs_read_consume(buf, read_start, count); 494 relay_file_read_consume(buf, read_start, count);
484 *ppos = relayfs_read_end_pos(buf, read_start, count); 495 *ppos = relay_file_read_end_pos(buf, read_start, count);
485out: 496out:
486 up(&inode->i_sem); 497 mutex_unlock(&inode->i_mutex);
487 return ret; 498 return ret;
488} 499}
489 500
490/** 501struct file_operations relay_file_operations = {
491 * relayfs alloc_inode() implementation 502 .open = relay_file_open,
492 */ 503 .poll = relay_file_poll,
493static struct inode *relayfs_alloc_inode(struct super_block *sb) 504 .mmap = relay_file_mmap,
494{ 505 .read = relay_file_read,
495 struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL);
496 if (!p)
497 return NULL;
498 p->buf = NULL;
499
500 return &p->vfs_inode;
501}
502
503/**
504 * relayfs destroy_inode() implementation
505 */
506static void relayfs_destroy_inode(struct inode *inode)
507{
508 if (RELAYFS_I(inode)->buf)
509 relay_destroy_buf(RELAYFS_I(inode)->buf);
510
511 kmem_cache_free(relayfs_inode_cachep, RELAYFS_I(inode));
512}
513
514static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
515{
516 struct relayfs_inode_info *i = p;
517 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR)
518 inode_init_once(&i->vfs_inode);
519}
520
521struct file_operations relayfs_file_operations = {
522 .open = relayfs_open,
523 .poll = relayfs_poll,
524 .mmap = relayfs_mmap,
525 .read = relayfs_read,
526 .llseek = no_llseek, 506 .llseek = no_llseek,
527 .release = relayfs_release, 507 .release = relay_file_release,
528}; 508};
529 509
530static struct super_operations relayfs_ops = { 510static struct super_operations relayfs_ops = {
531 .statfs = simple_statfs, 511 .statfs = simple_statfs,
532 .drop_inode = generic_delete_inode, 512 .drop_inode = generic_delete_inode,
533 .alloc_inode = relayfs_alloc_inode,
534 .destroy_inode = relayfs_destroy_inode,
535}; 513};
536 514
537static int relayfs_fill_super(struct super_block * sb, void * data, int silent) 515static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
@@ -544,7 +522,7 @@ static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
544 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 522 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
545 sb->s_magic = RELAYFS_MAGIC; 523 sb->s_magic = RELAYFS_MAGIC;
546 sb->s_op = &relayfs_ops; 524 sb->s_op = &relayfs_ops;
547 inode = relayfs_get_inode(sb, mode, NULL); 525 inode = relayfs_get_inode(sb, mode, NULL, NULL);
548 526
549 if (!inode) 527 if (!inode)
550 return -ENOMEM; 528 return -ENOMEM;
@@ -575,33 +553,27 @@ static struct file_system_type relayfs_fs_type = {
575 553
576static int __init init_relayfs_fs(void) 554static int __init init_relayfs_fs(void)
577{ 555{
578 int err; 556 return register_filesystem(&relayfs_fs_type);
579
580 relayfs_inode_cachep = kmem_cache_create("relayfs_inode_cache",
581 sizeof(struct relayfs_inode_info), 0,
582 0, init_once, NULL);
583 if (!relayfs_inode_cachep)
584 return -ENOMEM;
585
586 err = register_filesystem(&relayfs_fs_type);
587 if (err)
588 kmem_cache_destroy(relayfs_inode_cachep);
589
590 return err;
591} 557}
592 558
593static void __exit exit_relayfs_fs(void) 559static void __exit exit_relayfs_fs(void)
594{ 560{
561
562
563
564
565
595 unregister_filesystem(&relayfs_fs_type); 566 unregister_filesystem(&relayfs_fs_type);
596 kmem_cache_destroy(relayfs_inode_cachep);
597} 567}
598 568
599module_init(init_relayfs_fs) 569module_init(init_relayfs_fs)
600module_exit(exit_relayfs_fs) 570module_exit(exit_relayfs_fs)
601 571
602EXPORT_SYMBOL_GPL(relayfs_file_operations); 572EXPORT_SYMBOL_GPL(relay_file_operations);
603EXPORT_SYMBOL_GPL(relayfs_create_dir); 573EXPORT_SYMBOL_GPL(relayfs_create_dir);
604EXPORT_SYMBOL_GPL(relayfs_remove_dir); 574EXPORT_SYMBOL_GPL(relayfs_remove_dir);
575EXPORT_SYMBOL_GPL(relayfs_create_file);
576EXPORT_SYMBOL_GPL(relayfs_remove_file);
605 577
606MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>"); 578MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>");
607MODULE_DESCRIPTION("Relay Filesystem"); 579MODULE_DESCRIPTION("Relay Filesystem");
diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c
index 2a6f7f12b7f9..abf3ceaace49 100644
--- a/fs/relayfs/relay.c
+++ b/fs/relayfs/relay.c
@@ -80,11 +80,34 @@ static void buf_unmapped_default_callback(struct rchan_buf *buf,
80{ 80{
81} 81}
82 82
83/*
84 * create_buf_file_create() default callback. Creates file to represent buf.
85 */
86static struct dentry *create_buf_file_default_callback(const char *filename,
87 struct dentry *parent,
88 int mode,
89 struct rchan_buf *buf,
90 int *is_global)
91{
92 return relayfs_create_file(filename, parent, mode,
93 &relay_file_operations, buf);
94}
95
96/*
97 * remove_buf_file() default callback. Removes file representing relay buffer.
98 */
99static int remove_buf_file_default_callback(struct dentry *dentry)
100{
101 return relayfs_remove(dentry);
102}
103
83/* relay channel default callbacks */ 104/* relay channel default callbacks */
84static struct rchan_callbacks default_channel_callbacks = { 105static struct rchan_callbacks default_channel_callbacks = {
85 .subbuf_start = subbuf_start_default_callback, 106 .subbuf_start = subbuf_start_default_callback,
86 .buf_mapped = buf_mapped_default_callback, 107 .buf_mapped = buf_mapped_default_callback,
87 .buf_unmapped = buf_unmapped_default_callback, 108 .buf_unmapped = buf_unmapped_default_callback,
109 .create_buf_file = create_buf_file_default_callback,
110 .remove_buf_file = remove_buf_file_default_callback,
88}; 111};
89 112
90/** 113/**
@@ -148,14 +171,16 @@ static inline void __relay_reset(struct rchan_buf *buf, unsigned int init)
148void relay_reset(struct rchan *chan) 171void relay_reset(struct rchan *chan)
149{ 172{
150 unsigned int i; 173 unsigned int i;
174 struct rchan_buf *prev = NULL;
151 175
152 if (!chan) 176 if (!chan)
153 return; 177 return;
154 178
155 for (i = 0; i < NR_CPUS; i++) { 179 for (i = 0; i < NR_CPUS; i++) {
156 if (!chan->buf[i]) 180 if (!chan->buf[i] || chan->buf[i] == prev)
157 continue; 181 break;
158 __relay_reset(chan->buf[i], 0); 182 __relay_reset(chan->buf[i], 0);
183 prev = chan->buf[i];
159 } 184 }
160} 185}
161 186
@@ -166,17 +191,27 @@ void relay_reset(struct rchan *chan)
166 */ 191 */
167static struct rchan_buf *relay_open_buf(struct rchan *chan, 192static struct rchan_buf *relay_open_buf(struct rchan *chan,
168 const char *filename, 193 const char *filename,
169 struct dentry *parent) 194 struct dentry *parent,
195 int *is_global)
170{ 196{
171 struct rchan_buf *buf; 197 struct rchan_buf *buf;
172 struct dentry *dentry; 198 struct dentry *dentry;
173 199
200 if (*is_global)
201 return chan->buf[0];
202
203 buf = relay_create_buf(chan);
204 if (!buf)
205 return NULL;
206
174 /* Create file in fs */ 207 /* Create file in fs */
175 dentry = relayfs_create_file(filename, parent, S_IRUSR, chan); 208 dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR,
176 if (!dentry) 209 buf, is_global);
210 if (!dentry) {
211 relay_destroy_buf(buf);
177 return NULL; 212 return NULL;
213 }
178 214
179 buf = RELAYFS_I(dentry->d_inode)->buf;
180 buf->dentry = dentry; 215 buf->dentry = dentry;
181 __relay_reset(buf, 1); 216 __relay_reset(buf, 1);
182 217
@@ -214,6 +249,10 @@ static inline void setup_callbacks(struct rchan *chan,
214 cb->buf_mapped = buf_mapped_default_callback; 249 cb->buf_mapped = buf_mapped_default_callback;
215 if (!cb->buf_unmapped) 250 if (!cb->buf_unmapped)
216 cb->buf_unmapped = buf_unmapped_default_callback; 251 cb->buf_unmapped = buf_unmapped_default_callback;
252 if (!cb->create_buf_file)
253 cb->create_buf_file = create_buf_file_default_callback;
254 if (!cb->remove_buf_file)
255 cb->remove_buf_file = remove_buf_file_default_callback;
217 chan->cb = cb; 256 chan->cb = cb;
218} 257}
219 258
@@ -241,6 +280,7 @@ struct rchan *relay_open(const char *base_filename,
241 unsigned int i; 280 unsigned int i;
242 struct rchan *chan; 281 struct rchan *chan;
243 char *tmpname; 282 char *tmpname;
283 int is_global = 0;
244 284
245 if (!base_filename) 285 if (!base_filename)
246 return NULL; 286 return NULL;
@@ -265,7 +305,8 @@ struct rchan *relay_open(const char *base_filename,
265 305
266 for_each_online_cpu(i) { 306 for_each_online_cpu(i) {
267 sprintf(tmpname, "%s%d", base_filename, i); 307 sprintf(tmpname, "%s%d", base_filename, i);
268 chan->buf[i] = relay_open_buf(chan, tmpname, parent); 308 chan->buf[i] = relay_open_buf(chan, tmpname, parent,
309 &is_global);
269 chan->buf[i]->cpu = i; 310 chan->buf[i]->cpu = i;
270 if (!chan->buf[i]) 311 if (!chan->buf[i])
271 goto free_bufs; 312 goto free_bufs;
@@ -279,6 +320,8 @@ free_bufs:
279 if (!chan->buf[i]) 320 if (!chan->buf[i])
280 break; 321 break;
281 relay_close_buf(chan->buf[i]); 322 relay_close_buf(chan->buf[i]);
323 if (is_global)
324 break;
282 } 325 }
283 kfree(tmpname); 326 kfree(tmpname);
284 327
@@ -388,14 +431,16 @@ void relay_destroy_channel(struct kref *kref)
388void relay_close(struct rchan *chan) 431void relay_close(struct rchan *chan)
389{ 432{
390 unsigned int i; 433 unsigned int i;
434 struct rchan_buf *prev = NULL;
391 435
392 if (!chan) 436 if (!chan)
393 return; 437 return;
394 438
395 for (i = 0; i < NR_CPUS; i++) { 439 for (i = 0; i < NR_CPUS; i++) {
396 if (!chan->buf[i]) 440 if (!chan->buf[i] || chan->buf[i] == prev)
397 continue; 441 break;
398 relay_close_buf(chan->buf[i]); 442 relay_close_buf(chan->buf[i]);
443 prev = chan->buf[i];
399 } 444 }
400 445
401 if (chan->last_toobig) 446 if (chan->last_toobig)
@@ -415,14 +460,16 @@ void relay_close(struct rchan *chan)
415void relay_flush(struct rchan *chan) 460void relay_flush(struct rchan *chan)
416{ 461{
417 unsigned int i; 462 unsigned int i;
463 struct rchan_buf *prev = NULL;
418 464
419 if (!chan) 465 if (!chan)
420 return; 466 return;
421 467
422 for (i = 0; i < NR_CPUS; i++) { 468 for (i = 0; i < NR_CPUS; i++) {
423 if (!chan->buf[i]) 469 if (!chan->buf[i] || chan->buf[i] == prev)
424 continue; 470 break;
425 relay_switch_subbuf(chan->buf[i], 0); 471 relay_switch_subbuf(chan->buf[i], 0);
472 prev = chan->buf[i];
426 } 473 }
427} 474}
428 475
diff --git a/fs/relayfs/relay.h b/fs/relayfs/relay.h
index 703503fa22b6..0993d3e5753b 100644
--- a/fs/relayfs/relay.h
+++ b/fs/relayfs/relay.h
@@ -1,10 +1,6 @@
1#ifndef _RELAY_H 1#ifndef _RELAY_H
2#define _RELAY_H 2#define _RELAY_H
3 3
4struct dentry *relayfs_create_file(const char *name,
5 struct dentry *parent,
6 int mode,
7 struct rchan *chan);
8extern int relayfs_remove(struct dentry *dentry); 4extern int relayfs_remove(struct dentry *dentry);
9extern int relay_buf_empty(struct rchan_buf *buf); 5extern int relay_buf_empty(struct rchan_buf *buf);
10extern void relay_destroy_channel(struct kref *kref); 6extern void relay_destroy_channel(struct kref *kref);
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index c74f382dabba..0a13859fd57b 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -418,7 +418,7 @@ static int
418romfs_readpage(struct file *file, struct page * page) 418romfs_readpage(struct file *file, struct page * page)
419{ 419{
420 struct inode *inode = page->mapping->host; 420 struct inode *inode = page->mapping->host;
421 unsigned long offset, avail, readlen; 421 loff_t offset, avail, readlen;
422 void *buf; 422 void *buf;
423 int result = -EIO; 423 int result = -EIO;
424 424
@@ -429,8 +429,8 @@ romfs_readpage(struct file *file, struct page * page)
429 goto err_out; 429 goto err_out;
430 430
431 /* 32 bit warning -- but not for us :) */ 431 /* 32 bit warning -- but not for us :) */
432 offset = page->index << PAGE_CACHE_SHIFT; 432 offset = page_offset(page);
433 if (offset < inode->i_size) { 433 if (offset < i_size_read(inode)) {
434 avail = inode->i_size-offset; 434 avail = inode->i_size-offset;
435 readlen = min_t(unsigned long, avail, PAGE_SIZE); 435 readlen = min_t(unsigned long, avail, PAGE_SIZE);
436 if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) { 436 if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) {
diff --git a/fs/select.c b/fs/select.c
index f10a10317d54..c0f02d36c60e 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -179,12 +179,11 @@ get_max:
179#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) 179#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
180#define POLLEX_SET (POLLPRI) 180#define POLLEX_SET (POLLPRI)
181 181
182int do_select(int n, fd_set_bits *fds, long *timeout) 182int do_select(int n, fd_set_bits *fds, s64 *timeout)
183{ 183{
184 struct poll_wqueues table; 184 struct poll_wqueues table;
185 poll_table *wait; 185 poll_table *wait;
186 int retval, i; 186 int retval, i;
187 long __timeout = *timeout;
188 187
189 rcu_read_lock(); 188 rcu_read_lock();
190 retval = max_select_fd(n, fds); 189 retval = max_select_fd(n, fds);
@@ -196,11 +195,12 @@ int do_select(int n, fd_set_bits *fds, long *timeout)
196 195
197 poll_initwait(&table); 196 poll_initwait(&table);
198 wait = &table.pt; 197 wait = &table.pt;
199 if (!__timeout) 198 if (!*timeout)
200 wait = NULL; 199 wait = NULL;
201 retval = 0; 200 retval = 0;
202 for (;;) { 201 for (;;) {
203 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 202 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
203 long __timeout;
204 204
205 set_current_state(TASK_INTERRUPTIBLE); 205 set_current_state(TASK_INTERRUPTIBLE);
206 206
@@ -255,22 +255,32 @@ int do_select(int n, fd_set_bits *fds, long *timeout)
255 *rexp = res_ex; 255 *rexp = res_ex;
256 } 256 }
257 wait = NULL; 257 wait = NULL;
258 if (retval || !__timeout || signal_pending(current)) 258 if (retval || !*timeout || signal_pending(current))
259 break; 259 break;
260 if(table.error) { 260 if(table.error) {
261 retval = table.error; 261 retval = table.error;
262 break; 262 break;
263 } 263 }
264
265 if (*timeout < 0) {
266 /* Wait indefinitely */
267 __timeout = MAX_SCHEDULE_TIMEOUT;
268 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) {
269 /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */
270 __timeout = MAX_SCHEDULE_TIMEOUT - 1;
271 *timeout -= __timeout;
272 } else {
273 __timeout = *timeout;
274 *timeout = 0;
275 }
264 __timeout = schedule_timeout(__timeout); 276 __timeout = schedule_timeout(__timeout);
277 if (*timeout >= 0)
278 *timeout += __timeout;
265 } 279 }
266 __set_current_state(TASK_RUNNING); 280 __set_current_state(TASK_RUNNING);
267 281
268 poll_freewait(&table); 282 poll_freewait(&table);
269 283
270 /*
271 * Up-to-date the caller timeout.
272 */
273 *timeout = __timeout;
274 return retval; 284 return retval;
275} 285}
276 286
@@ -295,36 +305,14 @@ static void select_bits_free(void *bits, int size)
295#define MAX_SELECT_SECONDS \ 305#define MAX_SELECT_SECONDS \
296 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 306 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
297 307
298asmlinkage long 308static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
299sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp) 309 fd_set __user *exp, s64 *timeout)
300{ 310{
301 fd_set_bits fds; 311 fd_set_bits fds;
302 char *bits; 312 char *bits;
303 long timeout;
304 int ret, size, max_fdset; 313 int ret, size, max_fdset;
305 struct fdtable *fdt; 314 struct fdtable *fdt;
306 315
307 timeout = MAX_SCHEDULE_TIMEOUT;
308 if (tvp) {
309 time_t sec, usec;
310
311 if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp))
312 || __get_user(sec, &tvp->tv_sec)
313 || __get_user(usec, &tvp->tv_usec)) {
314 ret = -EFAULT;
315 goto out_nofds;
316 }
317
318 ret = -EINVAL;
319 if (sec < 0 || usec < 0)
320 goto out_nofds;
321
322 if ((unsigned long) sec < MAX_SELECT_SECONDS) {
323 timeout = ROUND_UP(usec, 1000000/HZ);
324 timeout += sec * (unsigned long) HZ;
325 }
326 }
327
328 ret = -EINVAL; 316 ret = -EINVAL;
329 if (n < 0) 317 if (n < 0)
330 goto out_nofds; 318 goto out_nofds;
@@ -362,18 +350,7 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
362 zero_fd_set(n, fds.res_out); 350 zero_fd_set(n, fds.res_out);
363 zero_fd_set(n, fds.res_ex); 351 zero_fd_set(n, fds.res_ex);
364 352
365 ret = do_select(n, &fds, &timeout); 353 ret = do_select(n, &fds, timeout);
366
367 if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
368 time_t sec = 0, usec = 0;
369 if (timeout) {
370 sec = timeout / HZ;
371 usec = timeout % HZ;
372 usec *= (1000000/HZ);
373 }
374 put_user(sec, &tvp->tv_sec);
375 put_user(usec, &tvp->tv_usec);
376 }
377 354
378 if (ret < 0) 355 if (ret < 0)
379 goto out; 356 goto out;
@@ -395,6 +372,154 @@ out_nofds:
395 return ret; 372 return ret;
396} 373}
397 374
375asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
376 fd_set __user *exp, struct timeval __user *tvp)
377{
378 s64 timeout = -1;
379 struct timeval tv;
380 int ret;
381
382 if (tvp) {
383 if (copy_from_user(&tv, tvp, sizeof(tv)))
384 return -EFAULT;
385
386 if (tv.tv_sec < 0 || tv.tv_usec < 0)
387 return -EINVAL;
388
389 /* Cast to u64 to make GCC stop complaining */
390 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
391 timeout = -1; /* infinite */
392 else {
393 timeout = ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ);
394 timeout += tv.tv_sec * HZ;
395 }
396 }
397
398 ret = core_sys_select(n, inp, outp, exp, &timeout);
399
400 if (tvp) {
401 if (current->personality & STICKY_TIMEOUTS)
402 goto sticky;
403 tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
404 tv.tv_sec = timeout;
405 if (copy_to_user(tvp, &tv, sizeof(tv))) {
406sticky:
407 /*
408 * If an application puts its timeval in read-only
409 * memory, we don't want the Linux-specific update to
410 * the timeval to cause a fault after the select has
411 * completed successfully. However, because we're not
412 * updating the timeval, we can't restart the system
413 * call.
414 */
415 if (ret == -ERESTARTNOHAND)
416 ret = -EINTR;
417 }
418 }
419
420 return ret;
421}
422
423#ifdef TIF_RESTORE_SIGMASK
424asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
425 fd_set __user *exp, struct timespec __user *tsp,
426 const sigset_t __user *sigmask, size_t sigsetsize)
427{
428 s64 timeout = MAX_SCHEDULE_TIMEOUT;
429 sigset_t ksigmask, sigsaved;
430 struct timespec ts;
431 int ret;
432
433 if (tsp) {
434 if (copy_from_user(&ts, tsp, sizeof(ts)))
435 return -EFAULT;
436
437 if (ts.tv_sec < 0 || ts.tv_nsec < 0)
438 return -EINVAL;
439
440 /* Cast to u64 to make GCC stop complaining */
441 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
442 timeout = -1; /* infinite */
443 else {
444 timeout = ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
445 timeout += ts.tv_sec * HZ;
446 }
447 }
448
449 if (sigmask) {
450 /* XXX: Don't preclude handling different sized sigset_t's. */
451 if (sigsetsize != sizeof(sigset_t))
452 return -EINVAL;
453 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
454 return -EFAULT;
455
456 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
457 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
458 }
459
460 ret = core_sys_select(n, inp, outp, exp, &timeout);
461
462 if (tsp) {
463 if (current->personality & STICKY_TIMEOUTS)
464 goto sticky;
465 ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000;
466 ts.tv_sec = timeout;
467 if (copy_to_user(tsp, &ts, sizeof(ts))) {
468sticky:
469 /*
470 * If an application puts its timeval in read-only
471 * memory, we don't want the Linux-specific update to
472 * the timeval to cause a fault after the select has
473 * completed successfully. However, because we're not
474 * updating the timeval, we can't restart the system
475 * call.
476 */
477 if (ret == -ERESTARTNOHAND)
478 ret = -EINTR;
479 }
480 }
481
482 if (ret == -ERESTARTNOHAND) {
483 /*
484 * Don't restore the signal mask yet. Let do_signal() deliver
485 * the signal on the way back to userspace, before the signal
486 * mask is restored.
487 */
488 if (sigmask) {
489 memcpy(&current->saved_sigmask, &sigsaved,
490 sizeof(sigsaved));
491 set_thread_flag(TIF_RESTORE_SIGMASK);
492 }
493 } else if (sigmask)
494 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
495
496 return ret;
497}
498
499/*
500 * Most architectures can't handle 7-argument syscalls. So we provide a
501 * 6-argument version where the sixth argument is a pointer to a structure
502 * which has a pointer to the sigset_t itself followed by a size_t containing
503 * the sigset size.
504 */
505asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp,
506 fd_set __user *exp, struct timespec __user *tsp, void __user *sig)
507{
508 size_t sigsetsize = 0;
509 sigset_t __user *up = NULL;
510
511 if (sig) {
512 if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t))
513 || __get_user(up, (sigset_t * __user *)sig)
514 || __get_user(sigsetsize,
515 (size_t * __user)(sig+sizeof(void *))))
516 return -EFAULT;
517 }
518
519 return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize);
520}
521#endif /* TIF_RESTORE_SIGMASK */
522
398struct poll_list { 523struct poll_list {
399 struct poll_list *next; 524 struct poll_list *next;
400 int len; 525 int len;
@@ -436,16 +561,19 @@ static void do_pollfd(unsigned int num, struct pollfd * fdpage,
436} 561}
437 562
438static int do_poll(unsigned int nfds, struct poll_list *list, 563static int do_poll(unsigned int nfds, struct poll_list *list,
439 struct poll_wqueues *wait, long timeout) 564 struct poll_wqueues *wait, s64 *timeout)
440{ 565{
441 int count = 0; 566 int count = 0;
442 poll_table* pt = &wait->pt; 567 poll_table* pt = &wait->pt;
443 568
444 if (!timeout) 569 /* Optimise the no-wait case */
570 if (!(*timeout))
445 pt = NULL; 571 pt = NULL;
446 572
447 for (;;) { 573 for (;;) {
448 struct poll_list *walk; 574 struct poll_list *walk;
575 long __timeout;
576
449 set_current_state(TASK_INTERRUPTIBLE); 577 set_current_state(TASK_INTERRUPTIBLE);
450 walk = list; 578 walk = list;
451 while(walk != NULL) { 579 while(walk != NULL) {
@@ -453,18 +581,36 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
453 walk = walk->next; 581 walk = walk->next;
454 } 582 }
455 pt = NULL; 583 pt = NULL;
456 if (count || !timeout || signal_pending(current)) 584 if (count || !*timeout || signal_pending(current))
457 break; 585 break;
458 count = wait->error; 586 count = wait->error;
459 if (count) 587 if (count)
460 break; 588 break;
461 timeout = schedule_timeout(timeout); 589
590 if (*timeout < 0) {
591 /* Wait indefinitely */
592 __timeout = MAX_SCHEDULE_TIMEOUT;
593 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) {
594 /*
595 * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in
596 * a loop
597 */
598 __timeout = MAX_SCHEDULE_TIMEOUT - 1;
599 *timeout -= __timeout;
600 } else {
601 __timeout = *timeout;
602 *timeout = 0;
603 }
604
605 __timeout = schedule_timeout(__timeout);
606 if (*timeout >= 0)
607 *timeout += __timeout;
462 } 608 }
463 __set_current_state(TASK_RUNNING); 609 __set_current_state(TASK_RUNNING);
464 return count; 610 return count;
465} 611}
466 612
467asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long timeout) 613int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
468{ 614{
469 struct poll_wqueues table; 615 struct poll_wqueues table;
470 int fdcount, err; 616 int fdcount, err;
@@ -482,14 +628,6 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
482 if (nfds > max_fdset && nfds > OPEN_MAX) 628 if (nfds > max_fdset && nfds > OPEN_MAX)
483 return -EINVAL; 629 return -EINVAL;
484 630
485 if (timeout) {
486 /* Careful about overflow in the intermediate values */
487 if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
488 timeout = (unsigned long)(timeout*HZ+999)/1000+1;
489 else /* Negative or overflow */
490 timeout = MAX_SCHEDULE_TIMEOUT;
491 }
492
493 poll_initwait(&table); 631 poll_initwait(&table);
494 632
495 head = NULL; 633 head = NULL;
@@ -519,6 +657,7 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
519 } 657 }
520 i -= pp->len; 658 i -= pp->len;
521 } 659 }
660
522 fdcount = do_poll(nfds, head, &table, timeout); 661 fdcount = do_poll(nfds, head, &table, timeout);
523 662
524 /* OK, now copy the revents fields back to user space. */ 663 /* OK, now copy the revents fields back to user space. */
@@ -547,3 +686,98 @@ out_fds:
547 poll_freewait(&table); 686 poll_freewait(&table);
548 return err; 687 return err;
549} 688}
689
690asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
691 long timeout_msecs)
692{
693 s64 timeout_jiffies = 0;
694
695 if (timeout_msecs) {
696#if HZ > 1000
697 /* We can only overflow if HZ > 1000 */
698 if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ)
699 timeout_jiffies = -1;
700 else
701#endif
702 timeout_jiffies = msecs_to_jiffies(timeout_msecs);
703 }
704
705 return do_sys_poll(ufds, nfds, &timeout_jiffies);
706}
707
708#ifdef TIF_RESTORE_SIGMASK
709asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
710 struct timespec __user *tsp, const sigset_t __user *sigmask,
711 size_t sigsetsize)
712{
713 sigset_t ksigmask, sigsaved;
714 struct timespec ts;
715 s64 timeout = -1;
716 int ret;
717
718 if (tsp) {
719 if (copy_from_user(&ts, tsp, sizeof(ts)))
720 return -EFAULT;
721
722 /* Cast to u64 to make GCC stop complaining */
723 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
724 timeout = -1; /* infinite */
725 else {
726 timeout = ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
727 timeout += ts.tv_sec * HZ;
728 }
729 }
730
731 if (sigmask) {
732 /* XXX: Don't preclude handling different sized sigset_t's. */
733 if (sigsetsize != sizeof(sigset_t))
734 return -EINVAL;
735 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
736 return -EFAULT;
737
738 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
739 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
740 }
741
742 ret = do_sys_poll(ufds, nfds, &timeout);
743
744 /* We can restart this syscall, usually */
745 if (ret == -EINTR) {
746 /*
747 * Don't restore the signal mask yet. Let do_signal() deliver
748 * the signal on the way back to userspace, before the signal
749 * mask is restored.
750 */
751 if (sigmask) {
752 memcpy(&current->saved_sigmask, &sigsaved,
753 sizeof(sigsaved));
754 set_thread_flag(TIF_RESTORE_SIGMASK);
755 }
756 ret = -ERESTARTNOHAND;
757 } else if (sigmask)
758 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
759
760 if (tsp && timeout >= 0) {
761 if (current->personality & STICKY_TIMEOUTS)
762 goto sticky;
763 /* Yes, we know it's actually an s64, but it's also positive. */
764 ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000;
765 ts.tv_sec = timeout;
766 if (copy_to_user(tsp, &ts, sizeof(ts))) {
767 sticky:
768 /*
769 * If an application puts its timeval in read-only
770 * memory, we don't want the Linux-specific update to
771 * the timeval to cause a fault after the select has
772 * completed successfully. However, because we're not
773 * updating the timeval, we can't restart the system
774 * call.
775 */
776 if (ret == -ERESTARTNOHAND && timeout >= 0)
777 ret = -EINTR;
778 }
779 }
780
781 return ret;
782}
783#endif /* TIF_RESTORE_SIGMASK */
diff --git a/fs/smbfs/Makefile b/fs/smbfs/Makefile
index 93246b7dd6fb..6673ee82cb4c 100644
--- a/fs/smbfs/Makefile
+++ b/fs/smbfs/Makefile
@@ -13,7 +13,6 @@ smbfs-objs := proc.o dir.o cache.o sock.o inode.o file.o ioctl.o getopt.o \
13EXTRA_CFLAGS += -DSMBFS_PARANOIA 13EXTRA_CFLAGS += -DSMBFS_PARANOIA
14#EXTRA_CFLAGS += -DSMBFS_DEBUG 14#EXTRA_CFLAGS += -DSMBFS_DEBUG
15#EXTRA_CFLAGS += -DSMBFS_DEBUG_VERBOSE 15#EXTRA_CFLAGS += -DSMBFS_DEBUG_VERBOSE
16#EXTRA_CFLAGS += -DDEBUG_SMB_MALLOC
17#EXTRA_CFLAGS += -DDEBUG_SMB_TIMESTAMP 16#EXTRA_CFLAGS += -DDEBUG_SMB_TIMESTAMP
18#EXTRA_CFLAGS += -Werror 17#EXTRA_CFLAGS += -Werror
19 18
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
index f3e6b81288ab..74b86d9725a6 100644
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -66,7 +66,7 @@ smb_invalidate_dircache_entries(struct dentry *parent)
66 spin_lock(&dcache_lock); 66 spin_lock(&dcache_lock);
67 next = parent->d_subdirs.next; 67 next = parent->d_subdirs.next;
68 while (next != &parent->d_subdirs) { 68 while (next != &parent->d_subdirs) {
69 dentry = list_entry(next, struct dentry, d_child); 69 dentry = list_entry(next, struct dentry, d_u.d_child);
70 dentry->d_fsdata = NULL; 70 dentry->d_fsdata = NULL;
71 smb_age_dentry(server, dentry); 71 smb_age_dentry(server, dentry);
72 next = next->next; 72 next = next->next;
@@ -100,7 +100,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
100 spin_lock(&dcache_lock); 100 spin_lock(&dcache_lock);
101 next = parent->d_subdirs.next; 101 next = parent->d_subdirs.next;
102 while (next != &parent->d_subdirs) { 102 while (next != &parent->d_subdirs) {
103 dent = list_entry(next, struct dentry, d_child); 103 dent = list_entry(next, struct dentry, d_u.d_child);
104 if ((unsigned long)dent->d_fsdata == fpos) { 104 if ((unsigned long)dent->d_fsdata == fpos) {
105 if (dent->d_inode) 105 if (dent->d_inode)
106 dget_locked(dent); 106 dget_locked(dent);
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index b4fcfa8b55a1..7042e62726a4 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -209,8 +209,8 @@ smb_updatepage(struct file *file, struct page *page, unsigned long offset,
209{ 209{
210 struct dentry *dentry = file->f_dentry; 210 struct dentry *dentry = file->f_dentry;
211 211
212 DEBUG1("(%s/%s %d@%ld)\n", DENTRY_PATH(dentry), 212 DEBUG1("(%s/%s %d@%lld)\n", DENTRY_PATH(dentry), count,
213 count, (page->index << PAGE_CACHE_SHIFT)+offset); 213 ((unsigned long long)page->index << PAGE_CACHE_SHIFT) + offset);
214 214
215 return smb_writepage_sync(dentry->d_inode, page, offset, count); 215 return smb_writepage_sync(dentry->d_inode, page, offset, count);
216} 216}
@@ -374,8 +374,7 @@ smb_file_release(struct inode *inode, struct file * file)
374 /* We must flush any dirty pages now as we won't be able to 374 /* We must flush any dirty pages now as we won't be able to
375 write anything after close. mmap can trigger this. 375 write anything after close. mmap can trigger this.
376 "openers" should perhaps include mmap'ers ... */ 376 "openers" should perhaps include mmap'ers ... */
377 filemap_fdatawrite(inode->i_mapping); 377 filemap_write_and_wait(inode->i_mapping);
378 filemap_fdatawait(inode->i_mapping);
379 smb_close(inode); 378 smb_close(inode);
380 } 379 }
381 unlock_kernel(); 380 unlock_kernel();
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 10b994428fef..02e3e82d465c 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -487,11 +487,11 @@ smb_put_super(struct super_block *sb)
487 if (server->conn_pid) 487 if (server->conn_pid)
488 kill_proc(server->conn_pid, SIGTERM, 1); 488 kill_proc(server->conn_pid, SIGTERM, 1);
489 489
490 smb_kfree(server->ops); 490 kfree(server->ops);
491 smb_unload_nls(server); 491 smb_unload_nls(server);
492 sb->s_fs_info = NULL; 492 sb->s_fs_info = NULL;
493 smb_unlock_server(server); 493 smb_unlock_server(server);
494 smb_kfree(server); 494 kfree(server);
495} 495}
496 496
497static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) 497static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
@@ -519,11 +519,10 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
519 sb->s_op = &smb_sops; 519 sb->s_op = &smb_sops;
520 sb->s_time_gran = 100; 520 sb->s_time_gran = 100;
521 521
522 server = smb_kmalloc(sizeof(struct smb_sb_info), GFP_KERNEL); 522 server = kzalloc(sizeof(struct smb_sb_info), GFP_KERNEL);
523 if (!server) 523 if (!server)
524 goto out_no_server; 524 goto out_no_server;
525 sb->s_fs_info = server; 525 sb->s_fs_info = server;
526 memset(server, 0, sizeof(struct smb_sb_info));
527 526
528 server->super_block = sb; 527 server->super_block = sb;
529 server->mnt = NULL; 528 server->mnt = NULL;
@@ -542,8 +541,8 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
542 /* FIXME: move these to the smb_sb_info struct */ 541 /* FIXME: move these to the smb_sb_info struct */
543 VERBOSE("alloc chunk = %d\n", sizeof(struct smb_ops) + 542 VERBOSE("alloc chunk = %d\n", sizeof(struct smb_ops) +
544 sizeof(struct smb_mount_data_kernel)); 543 sizeof(struct smb_mount_data_kernel));
545 mem = smb_kmalloc(sizeof(struct smb_ops) + 544 mem = kmalloc(sizeof(struct smb_ops) +
546 sizeof(struct smb_mount_data_kernel), GFP_KERNEL); 545 sizeof(struct smb_mount_data_kernel), GFP_KERNEL);
547 if (!mem) 546 if (!mem)
548 goto out_no_mem; 547 goto out_no_mem;
549 548
@@ -621,12 +620,12 @@ out_no_root:
621out_no_smbiod: 620out_no_smbiod:
622 smb_unload_nls(server); 621 smb_unload_nls(server);
623out_bad_option: 622out_bad_option:
624 smb_kfree(mem); 623 kfree(mem);
625out_no_mem: 624out_no_mem:
626 if (!server->mnt) 625 if (!server->mnt)
627 printk(KERN_ERR "smb_fill_super: allocation failure\n"); 626 printk(KERN_ERR "smb_fill_super: allocation failure\n");
628 sb->s_fs_info = NULL; 627 sb->s_fs_info = NULL;
629 smb_kfree(server); 628 kfree(server);
630 goto out_fail; 629 goto out_fail;
631out_wrong_data: 630out_wrong_data:
632 printk(KERN_ERR "smbfs: mount_data version %d is not supported\n", ver); 631 printk(KERN_ERR "smbfs: mount_data version %d is not supported\n", ver);
@@ -697,8 +696,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr)
697 DENTRY_PATH(dentry), 696 DENTRY_PATH(dentry),
698 (long) inode->i_size, (long) attr->ia_size); 697 (long) inode->i_size, (long) attr->ia_size);
699 698
700 filemap_fdatawrite(inode->i_mapping); 699 filemap_write_and_wait(inode->i_mapping);
701 filemap_fdatawait(inode->i_mapping);
702 700
703 error = smb_open(dentry, O_WRONLY); 701 error = smb_open(dentry, O_WRONLY);
704 if (error) 702 if (error)
@@ -783,12 +781,6 @@ out:
783 return error; 781 return error;
784} 782}
785 783
786#ifdef DEBUG_SMB_MALLOC
787int smb_malloced;
788int smb_current_kmalloced;
789int smb_current_vmalloced;
790#endif
791
792static struct super_block *smb_get_sb(struct file_system_type *fs_type, 784static struct super_block *smb_get_sb(struct file_system_type *fs_type,
793 int flags, const char *dev_name, void *data) 785 int flags, const char *dev_name, void *data)
794{ 786{
@@ -808,12 +800,6 @@ static int __init init_smb_fs(void)
808 int err; 800 int err;
809 DEBUG1("registering ...\n"); 801 DEBUG1("registering ...\n");
810 802
811#ifdef DEBUG_SMB_MALLOC
812 smb_malloced = 0;
813 smb_current_kmalloced = 0;
814 smb_current_vmalloced = 0;
815#endif
816
817 err = init_inodecache(); 803 err = init_inodecache();
818 if (err) 804 if (err)
819 goto out_inode; 805 goto out_inode;
@@ -838,11 +824,6 @@ static void __exit exit_smb_fs(void)
838 unregister_filesystem(&smb_fs_type); 824 unregister_filesystem(&smb_fs_type);
839 smb_destroy_request_cache(); 825 smb_destroy_request_cache();
840 destroy_inodecache(); 826 destroy_inodecache();
841#ifdef DEBUG_SMB_MALLOC
842 printk(KERN_DEBUG "smb_malloced: %d\n", smb_malloced);
843 printk(KERN_DEBUG "smb_current_kmalloced: %d\n",smb_current_kmalloced);
844 printk(KERN_DEBUG "smb_current_vmalloced: %d\n",smb_current_vmalloced);
845#endif
846} 827}
847 828
848module_init(init_smb_fs) 829module_init(init_smb_fs)
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index 38ab558835c4..b1b878b81730 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -8,6 +8,7 @@
8 */ 8 */
9 9
10#include <linux/types.h> 10#include <linux/types.h>
11#include <linux/capability.h>
11#include <linux/errno.h> 12#include <linux/errno.h>
12#include <linux/slab.h> 13#include <linux/slab.h>
13#include <linux/fs.h> 14#include <linux/fs.h>
@@ -3113,7 +3114,7 @@ smb_proc_setattr_unix(struct dentry *d, struct iattr *attr,
3113 LSET(data, 32, SMB_TIME_NO_CHANGE); 3114 LSET(data, 32, SMB_TIME_NO_CHANGE);
3114 LSET(data, 40, SMB_UID_NO_CHANGE); 3115 LSET(data, 40, SMB_UID_NO_CHANGE);
3115 LSET(data, 48, SMB_GID_NO_CHANGE); 3116 LSET(data, 48, SMB_GID_NO_CHANGE);
3116 LSET(data, 56, smb_filetype_from_mode(attr->ia_mode)); 3117 DSET(data, 56, smb_filetype_from_mode(attr->ia_mode));
3117 LSET(data, 60, major); 3118 LSET(data, 60, major);
3118 LSET(data, 68, minor); 3119 LSET(data, 68, minor);
3119 LSET(data, 76, 0); 3120 LSET(data, 76, 0);
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index a0f296d9928a..c71c375863cc 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -68,7 +68,7 @@ static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
68 goto out; 68 goto out;
69 69
70 if (bufsize > 0) { 70 if (bufsize > 0) {
71 buf = smb_kmalloc(bufsize, GFP_NOFS); 71 buf = kmalloc(bufsize, GFP_NOFS);
72 if (!buf) { 72 if (!buf) {
73 kmem_cache_free(req_cachep, req); 73 kmem_cache_free(req_cachep, req);
74 return NULL; 74 return NULL;
@@ -124,9 +124,8 @@ static void smb_free_request(struct smb_request *req)
124{ 124{
125 atomic_dec(&req->rq_server->nr_requests); 125 atomic_dec(&req->rq_server->nr_requests);
126 if (req->rq_buffer && !(req->rq_flags & SMB_REQ_STATIC)) 126 if (req->rq_buffer && !(req->rq_flags & SMB_REQ_STATIC))
127 smb_kfree(req->rq_buffer); 127 kfree(req->rq_buffer);
128 if (req->rq_trans2buffer) 128 kfree(req->rq_trans2buffer);
129 smb_kfree(req->rq_trans2buffer);
130 kmem_cache_free(req_cachep, req); 129 kmem_cache_free(req_cachep, req);
131} 130}
132 131
@@ -183,8 +182,7 @@ static int smb_setup_request(struct smb_request *req)
183 req->rq_err = 0; 182 req->rq_err = 0;
184 req->rq_errno = 0; 183 req->rq_errno = 0;
185 req->rq_fragment = 0; 184 req->rq_fragment = 0;
186 if (req->rq_trans2buffer) 185 kfree(req->rq_trans2buffer);
187 smb_kfree(req->rq_trans2buffer);
188 186
189 return 0; 187 return 0;
190} 188}
@@ -647,10 +645,9 @@ static int smb_recv_trans2(struct smb_sb_info *server, struct smb_request *req)
647 goto out_too_long; 645 goto out_too_long;
648 646
649 req->rq_trans2bufsize = buf_len; 647 req->rq_trans2bufsize = buf_len;
650 req->rq_trans2buffer = smb_kmalloc(buf_len, GFP_NOFS); 648 req->rq_trans2buffer = kzalloc(buf_len, GFP_NOFS);
651 if (!req->rq_trans2buffer) 649 if (!req->rq_trans2buffer)
652 goto out_no_mem; 650 goto out_no_mem;
653 memset(req->rq_trans2buffer, 0, buf_len);
654 651
655 req->rq_parm = req->rq_trans2buffer; 652 req->rq_parm = req->rq_trans2buffer;
656 req->rq_data = req->rq_trans2buffer + parm_tot; 653 req->rq_data = req->rq_trans2buffer + parm_tot;
diff --git a/fs/stat.c b/fs/stat.c
index b8a0e5110ab2..24211b030f39 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -63,12 +63,12 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
63 63
64EXPORT_SYMBOL(vfs_getattr); 64EXPORT_SYMBOL(vfs_getattr);
65 65
66int vfs_stat(char __user *name, struct kstat *stat) 66int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat)
67{ 67{
68 struct nameidata nd; 68 struct nameidata nd;
69 int error; 69 int error;
70 70
71 error = user_path_walk(name, &nd); 71 error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd);
72 if (!error) { 72 if (!error) {
73 error = vfs_getattr(nd.mnt, nd.dentry, stat); 73 error = vfs_getattr(nd.mnt, nd.dentry, stat);
74 path_release(&nd); 74 path_release(&nd);
@@ -76,14 +76,19 @@ int vfs_stat(char __user *name, struct kstat *stat)
76 return error; 76 return error;
77} 77}
78 78
79int vfs_stat(char __user *name, struct kstat *stat)
80{
81 return vfs_stat_fd(AT_FDCWD, name, stat);
82}
83
79EXPORT_SYMBOL(vfs_stat); 84EXPORT_SYMBOL(vfs_stat);
80 85
81int vfs_lstat(char __user *name, struct kstat *stat) 86int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat)
82{ 87{
83 struct nameidata nd; 88 struct nameidata nd;
84 int error; 89 int error;
85 90
86 error = user_path_walk_link(name, &nd); 91 error = __user_walk_fd(dfd, name, 0, &nd);
87 if (!error) { 92 if (!error) {
88 error = vfs_getattr(nd.mnt, nd.dentry, stat); 93 error = vfs_getattr(nd.mnt, nd.dentry, stat);
89 path_release(&nd); 94 path_release(&nd);
@@ -91,6 +96,11 @@ int vfs_lstat(char __user *name, struct kstat *stat)
91 return error; 96 return error;
92} 97}
93 98
99int vfs_lstat(char __user *name, struct kstat *stat)
100{
101 return vfs_lstat_fd(AT_FDCWD, name, stat);
102}
103
94EXPORT_SYMBOL(vfs_lstat); 104EXPORT_SYMBOL(vfs_lstat);
95 105
96int vfs_fstat(unsigned int fd, struct kstat *stat) 106int vfs_fstat(unsigned int fd, struct kstat *stat)
@@ -151,7 +161,7 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta
151asmlinkage long sys_stat(char __user * filename, struct __old_kernel_stat __user * statbuf) 161asmlinkage long sys_stat(char __user * filename, struct __old_kernel_stat __user * statbuf)
152{ 162{
153 struct kstat stat; 163 struct kstat stat;
154 int error = vfs_stat(filename, &stat); 164 int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
155 165
156 if (!error) 166 if (!error)
157 error = cp_old_stat(&stat, statbuf); 167 error = cp_old_stat(&stat, statbuf);
@@ -161,7 +171,7 @@ asmlinkage long sys_stat(char __user * filename, struct __old_kernel_stat __user
161asmlinkage long sys_lstat(char __user * filename, struct __old_kernel_stat __user * statbuf) 171asmlinkage long sys_lstat(char __user * filename, struct __old_kernel_stat __user * statbuf)
162{ 172{
163 struct kstat stat; 173 struct kstat stat;
164 int error = vfs_lstat(filename, &stat); 174 int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
165 175
166 if (!error) 176 if (!error)
167 error = cp_old_stat(&stat, statbuf); 177 error = cp_old_stat(&stat, statbuf);
@@ -229,27 +239,50 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
229 return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; 239 return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
230} 240}
231 241
232asmlinkage long sys_newstat(char __user * filename, struct stat __user * statbuf) 242asmlinkage long sys_newstat(char __user *filename, struct stat __user *statbuf)
233{ 243{
234 struct kstat stat; 244 struct kstat stat;
235 int error = vfs_stat(filename, &stat); 245 int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
236 246
237 if (!error) 247 if (!error)
238 error = cp_new_stat(&stat, statbuf); 248 error = cp_new_stat(&stat, statbuf);
239 249
240 return error; 250 return error;
241} 251}
242asmlinkage long sys_newlstat(char __user * filename, struct stat __user * statbuf) 252
253asmlinkage long sys_newlstat(char __user *filename, struct stat __user *statbuf)
243{ 254{
244 struct kstat stat; 255 struct kstat stat;
245 int error = vfs_lstat(filename, &stat); 256 int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
246 257
247 if (!error) 258 if (!error)
248 error = cp_new_stat(&stat, statbuf); 259 error = cp_new_stat(&stat, statbuf);
249 260
250 return error; 261 return error;
251} 262}
252asmlinkage long sys_newfstat(unsigned int fd, struct stat __user * statbuf) 263
264asmlinkage long sys_newfstatat(int dfd, char __user *filename,
265 struct stat __user *statbuf, int flag)
266{
267 struct kstat stat;
268 int error = -EINVAL;
269
270 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
271 goto out;
272
273 if (flag & AT_SYMLINK_NOFOLLOW)
274 error = vfs_lstat_fd(dfd, filename, &stat);
275 else
276 error = vfs_stat_fd(dfd, filename, &stat);
277
278 if (!error)
279 error = cp_new_stat(&stat, statbuf);
280
281out:
282 return error;
283}
284
285asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf)
253{ 286{
254 struct kstat stat; 287 struct kstat stat;
255 int error = vfs_fstat(fd, &stat); 288 int error = vfs_fstat(fd, &stat);
@@ -260,7 +293,8 @@ asmlinkage long sys_newfstat(unsigned int fd, struct stat __user * statbuf)
260 return error; 293 return error;
261} 294}
262 295
263asmlinkage long sys_readlink(const char __user * path, char __user * buf, int bufsiz) 296asmlinkage long sys_readlinkat(int dfd, const char __user *path,
297 char __user *buf, int bufsiz)
264{ 298{
265 struct nameidata nd; 299 struct nameidata nd;
266 int error; 300 int error;
@@ -268,7 +302,7 @@ asmlinkage long sys_readlink(const char __user * path, char __user * buf, int bu
268 if (bufsiz <= 0) 302 if (bufsiz <= 0)
269 return -EINVAL; 303 return -EINVAL;
270 304
271 error = user_path_walk_link(path, &nd); 305 error = __user_walk_fd(dfd, path, 0, &nd);
272 if (!error) { 306 if (!error) {
273 struct inode * inode = nd.dentry->d_inode; 307 struct inode * inode = nd.dentry->d_inode;
274 308
@@ -285,6 +319,12 @@ asmlinkage long sys_readlink(const char __user * path, char __user * buf, int bu
285 return error; 319 return error;
286} 320}
287 321
322asmlinkage long sys_readlink(const char __user *path, char __user *buf,
323 int bufsiz)
324{
325 return sys_readlinkat(AT_FDCWD, path, buf, bufsiz);
326}
327
288 328
289/* ---------- LFS-64 ----------- */ 329/* ---------- LFS-64 ----------- */
290#ifdef __ARCH_WANT_STAT64 330#ifdef __ARCH_WANT_STAT64
diff --git a/fs/super.c b/fs/super.c
index 5a347a4f673a..c177b92419c5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -72,7 +72,7 @@ static struct super_block *alloc_super(void)
72 INIT_HLIST_HEAD(&s->s_anon); 72 INIT_HLIST_HEAD(&s->s_anon);
73 INIT_LIST_HEAD(&s->s_inodes); 73 INIT_LIST_HEAD(&s->s_inodes);
74 init_rwsem(&s->s_umount); 74 init_rwsem(&s->s_umount);
75 sema_init(&s->s_lock, 1); 75 mutex_init(&s->s_lock);
76 down_write(&s->s_umount); 76 down_write(&s->s_umount);
77 s->s_count = S_BIAS; 77 s->s_count = S_BIAS;
78 atomic_set(&s->s_active, 1); 78 atomic_set(&s->s_active, 1);
@@ -700,8 +700,7 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
700 700
701 s->s_flags = flags; 701 s->s_flags = flags;
702 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 702 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
703 s->s_old_blocksize = block_size(bdev); 703 sb_set_blocksize(s, block_size(bdev));
704 sb_set_blocksize(s, s->s_old_blocksize);
705 error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); 704 error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
706 if (error) { 705 if (error) {
707 up_write(&s->s_umount); 706 up_write(&s->s_umount);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index d36780382176..49bd219275db 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -99,7 +99,7 @@ static int create_dir(struct kobject * k, struct dentry * p,
99 int error; 99 int error;
100 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; 100 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
101 101
102 down(&p->d_inode->i_sem); 102 mutex_lock(&p->d_inode->i_mutex);
103 *d = lookup_one_len(n, p, strlen(n)); 103 *d = lookup_one_len(n, p, strlen(n));
104 if (!IS_ERR(*d)) { 104 if (!IS_ERR(*d)) {
105 error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR); 105 error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR);
@@ -122,7 +122,7 @@ static int create_dir(struct kobject * k, struct dentry * p,
122 dput(*d); 122 dput(*d);
123 } else 123 } else
124 error = PTR_ERR(*d); 124 error = PTR_ERR(*d);
125 up(&p->d_inode->i_sem); 125 mutex_unlock(&p->d_inode->i_mutex);
126 return error; 126 return error;
127} 127}
128 128
@@ -246,7 +246,7 @@ static void remove_dir(struct dentry * d)
246 struct dentry * parent = dget(d->d_parent); 246 struct dentry * parent = dget(d->d_parent);
247 struct sysfs_dirent * sd; 247 struct sysfs_dirent * sd;
248 248
249 down(&parent->d_inode->i_sem); 249 mutex_lock(&parent->d_inode->i_mutex);
250 d_delete(d); 250 d_delete(d);
251 sd = d->d_fsdata; 251 sd = d->d_fsdata;
252 list_del_init(&sd->s_sibling); 252 list_del_init(&sd->s_sibling);
@@ -257,7 +257,7 @@ static void remove_dir(struct dentry * d)
257 pr_debug(" o %s removing done (%d)\n",d->d_name.name, 257 pr_debug(" o %s removing done (%d)\n",d->d_name.name,
258 atomic_read(&d->d_count)); 258 atomic_read(&d->d_count));
259 259
260 up(&parent->d_inode->i_sem); 260 mutex_unlock(&parent->d_inode->i_mutex);
261 dput(parent); 261 dput(parent);
262} 262}
263 263
@@ -286,7 +286,7 @@ void sysfs_remove_dir(struct kobject * kobj)
286 return; 286 return;
287 287
288 pr_debug("sysfs %s: removing dir\n",dentry->d_name.name); 288 pr_debug("sysfs %s: removing dir\n",dentry->d_name.name);
289 down(&dentry->d_inode->i_sem); 289 mutex_lock(&dentry->d_inode->i_mutex);
290 parent_sd = dentry->d_fsdata; 290 parent_sd = dentry->d_fsdata;
291 list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { 291 list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
292 if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED)) 292 if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED))
@@ -295,7 +295,7 @@ void sysfs_remove_dir(struct kobject * kobj)
295 sysfs_drop_dentry(sd, dentry); 295 sysfs_drop_dentry(sd, dentry);
296 sysfs_put(sd); 296 sysfs_put(sd);
297 } 297 }
298 up(&dentry->d_inode->i_sem); 298 mutex_unlock(&dentry->d_inode->i_mutex);
299 299
300 remove_dir(dentry); 300 remove_dir(dentry);
301 /** 301 /**
@@ -318,7 +318,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
318 down_write(&sysfs_rename_sem); 318 down_write(&sysfs_rename_sem);
319 parent = kobj->parent->dentry; 319 parent = kobj->parent->dentry;
320 320
321 down(&parent->d_inode->i_sem); 321 mutex_lock(&parent->d_inode->i_mutex);
322 322
323 new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); 323 new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
324 if (!IS_ERR(new_dentry)) { 324 if (!IS_ERR(new_dentry)) {
@@ -334,7 +334,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
334 error = -EEXIST; 334 error = -EEXIST;
335 dput(new_dentry); 335 dput(new_dentry);
336 } 336 }
337 up(&parent->d_inode->i_sem); 337 mutex_unlock(&parent->d_inode->i_mutex);
338 up_write(&sysfs_rename_sem); 338 up_write(&sysfs_rename_sem);
339 339
340 return error; 340 return error;
@@ -345,9 +345,9 @@ static int sysfs_dir_open(struct inode *inode, struct file *file)
345 struct dentry * dentry = file->f_dentry; 345 struct dentry * dentry = file->f_dentry;
346 struct sysfs_dirent * parent_sd = dentry->d_fsdata; 346 struct sysfs_dirent * parent_sd = dentry->d_fsdata;
347 347
348 down(&dentry->d_inode->i_sem); 348 mutex_lock(&dentry->d_inode->i_mutex);
349 file->private_data = sysfs_new_dirent(parent_sd, NULL); 349 file->private_data = sysfs_new_dirent(parent_sd, NULL);
350 up(&dentry->d_inode->i_sem); 350 mutex_unlock(&dentry->d_inode->i_mutex);
351 351
352 return file->private_data ? 0 : -ENOMEM; 352 return file->private_data ? 0 : -ENOMEM;
353 353
@@ -358,9 +358,9 @@ static int sysfs_dir_close(struct inode *inode, struct file *file)
358 struct dentry * dentry = file->f_dentry; 358 struct dentry * dentry = file->f_dentry;
359 struct sysfs_dirent * cursor = file->private_data; 359 struct sysfs_dirent * cursor = file->private_data;
360 360
361 down(&dentry->d_inode->i_sem); 361 mutex_lock(&dentry->d_inode->i_mutex);
362 list_del_init(&cursor->s_sibling); 362 list_del_init(&cursor->s_sibling);
363 up(&dentry->d_inode->i_sem); 363 mutex_unlock(&dentry->d_inode->i_mutex);
364 364
365 release_sysfs_dirent(cursor); 365 release_sysfs_dirent(cursor);
366 366
@@ -436,7 +436,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
436{ 436{
437 struct dentry * dentry = file->f_dentry; 437 struct dentry * dentry = file->f_dentry;
438 438
439 down(&dentry->d_inode->i_sem); 439 mutex_lock(&dentry->d_inode->i_mutex);
440 switch (origin) { 440 switch (origin) {
441 case 1: 441 case 1:
442 offset += file->f_pos; 442 offset += file->f_pos;
@@ -444,7 +444,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
444 if (offset >= 0) 444 if (offset >= 0)
445 break; 445 break;
446 default: 446 default:
447 up(&file->f_dentry->d_inode->i_sem); 447 mutex_unlock(&file->f_dentry->d_inode->i_mutex);
448 return -EINVAL; 448 return -EINVAL;
449 } 449 }
450 if (offset != file->f_pos) { 450 if (offset != file->f_pos) {
@@ -468,7 +468,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
468 list_add_tail(&cursor->s_sibling, p); 468 list_add_tail(&cursor->s_sibling, p);
469 } 469 }
470 } 470 }
471 up(&dentry->d_inode->i_sem); 471 mutex_unlock(&dentry->d_inode->i_mutex);
472 return offset; 472 return offset;
473} 473}
474 474
@@ -483,4 +483,3 @@ struct file_operations sysfs_dir_operations = {
483EXPORT_SYMBOL_GPL(sysfs_create_dir); 483EXPORT_SYMBOL_GPL(sysfs_create_dir);
484EXPORT_SYMBOL_GPL(sysfs_remove_dir); 484EXPORT_SYMBOL_GPL(sysfs_remove_dir);
485EXPORT_SYMBOL_GPL(sysfs_rename_dir); 485EXPORT_SYMBOL_GPL(sysfs_rename_dir);
486
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 4013d7905e84..d0e3d8495165 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -364,9 +364,9 @@ int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type)
364 umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG; 364 umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG;
365 int error = 0; 365 int error = 0;
366 366
367 down(&dir->d_inode->i_sem); 367 mutex_lock(&dir->d_inode->i_mutex);
368 error = sysfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type); 368 error = sysfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type);
369 up(&dir->d_inode->i_sem); 369 mutex_unlock(&dir->d_inode->i_mutex);
370 370
371 return error; 371 return error;
372} 372}
@@ -398,7 +398,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
398 struct dentry * victim; 398 struct dentry * victim;
399 int res = -ENOENT; 399 int res = -ENOENT;
400 400
401 down(&dir->d_inode->i_sem); 401 mutex_lock(&dir->d_inode->i_mutex);
402 victim = lookup_one_len(attr->name, dir, strlen(attr->name)); 402 victim = lookup_one_len(attr->name, dir, strlen(attr->name));
403 if (!IS_ERR(victim)) { 403 if (!IS_ERR(victim)) {
404 /* make sure dentry is really there */ 404 /* make sure dentry is really there */
@@ -420,7 +420,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
420 */ 420 */
421 dput(victim); 421 dput(victim);
422 } 422 }
423 up(&dir->d_inode->i_sem); 423 mutex_unlock(&dir->d_inode->i_mutex);
424 424
425 return res; 425 return res;
426} 426}
@@ -441,22 +441,22 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
441 struct iattr newattrs; 441 struct iattr newattrs;
442 int res = -ENOENT; 442 int res = -ENOENT;
443 443
444 down(&dir->d_inode->i_sem); 444 mutex_lock(&dir->d_inode->i_mutex);
445 victim = lookup_one_len(attr->name, dir, strlen(attr->name)); 445 victim = lookup_one_len(attr->name, dir, strlen(attr->name));
446 if (!IS_ERR(victim)) { 446 if (!IS_ERR(victim)) {
447 if (victim->d_inode && 447 if (victim->d_inode &&
448 (victim->d_parent->d_inode == dir->d_inode)) { 448 (victim->d_parent->d_inode == dir->d_inode)) {
449 inode = victim->d_inode; 449 inode = victim->d_inode;
450 down(&inode->i_sem); 450 mutex_lock(&inode->i_mutex);
451 newattrs.ia_mode = (mode & S_IALLUGO) | 451 newattrs.ia_mode = (mode & S_IALLUGO) |
452 (inode->i_mode & ~S_IALLUGO); 452 (inode->i_mode & ~S_IALLUGO);
453 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 453 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
454 res = notify_change(victim, &newattrs); 454 res = notify_change(victim, &newattrs);
455 up(&inode->i_sem); 455 mutex_unlock(&inode->i_mutex);
456 } 456 }
457 dput(victim); 457 dput(victim);
458 } 458 }
459 up(&dir->d_inode->i_sem); 459 mutex_unlock(&dir->d_inode->i_mutex);
460 460
461 return res; 461 return res;
462} 462}
@@ -480,4 +480,3 @@ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
480EXPORT_SYMBOL_GPL(sysfs_create_file); 480EXPORT_SYMBOL_GPL(sysfs_create_file);
481EXPORT_SYMBOL_GPL(sysfs_remove_file); 481EXPORT_SYMBOL_GPL(sysfs_remove_file);
482EXPORT_SYMBOL_GPL(sysfs_update_file); 482EXPORT_SYMBOL_GPL(sysfs_update_file);
483
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 970a33f03299..689f7bcfaf30 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -11,6 +11,7 @@
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/namei.h> 12#include <linux/namei.h>
13#include <linux/backing-dev.h> 13#include <linux/backing-dev.h>
14#include <linux/capability.h>
14#include "sysfs.h" 15#include "sysfs.h"
15 16
16extern struct super_block * sysfs_sb; 17extern struct super_block * sysfs_sb;
@@ -201,7 +202,7 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd)
201 202
202/* 203/*
203 * Unhashes the dentry corresponding to given sysfs_dirent 204 * Unhashes the dentry corresponding to given sysfs_dirent
204 * Called with parent inode's i_sem held. 205 * Called with parent inode's i_mutex held.
205 */ 206 */
206void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) 207void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent)
207{ 208{
@@ -232,7 +233,7 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name)
232 /* no inode means this hasn't been made visible yet */ 233 /* no inode means this hasn't been made visible yet */
233 return; 234 return;
234 235
235 down(&dir->d_inode->i_sem); 236 mutex_lock(&dir->d_inode->i_mutex);
236 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 237 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
237 if (!sd->s_element) 238 if (!sd->s_element)
238 continue; 239 continue;
@@ -243,7 +244,5 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name)
243 break; 244 break;
244 } 245 }
245 } 246 }
246 up(&dir->d_inode->i_sem); 247 mutex_unlock(&dir->d_inode->i_mutex);
247} 248}
248
249
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index de402fa915f2..e38d6338a20d 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -86,9 +86,9 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
86 86
87 BUG_ON(!kobj || !kobj->dentry || !name); 87 BUG_ON(!kobj || !kobj->dentry || !name);
88 88
89 down(&dentry->d_inode->i_sem); 89 mutex_lock(&dentry->d_inode->i_mutex);
90 error = sysfs_add_link(dentry, name, target); 90 error = sysfs_add_link(dentry, name, target);
91 up(&dentry->d_inode->i_sem); 91 mutex_unlock(&dentry->d_inode->i_mutex);
92 return error; 92 return error;
93} 93}
94 94
@@ -177,4 +177,3 @@ struct inode_operations sysfs_symlink_inode_operations = {
177 177
178EXPORT_SYMBOL_GPL(sysfs_create_link); 178EXPORT_SYMBOL_GPL(sysfs_create_link);
179EXPORT_SYMBOL_GPL(sysfs_remove_link); 179EXPORT_SYMBOL_GPL(sysfs_remove_link);
180
diff --git a/fs/sysv/ChangeLog b/fs/sysv/ChangeLog
index 18e3487debdb..f403f8b91b80 100644
--- a/fs/sysv/ChangeLog
+++ b/fs/sysv/ChangeLog
@@ -54,7 +54,7 @@ Fri Jan 4 2002 Alexander Viro <viro@parcelfarce.linux.theplanet.co.uk>
54 (sysv_read_super): Likewise. 54 (sysv_read_super): Likewise.
55 (v7_read_super): Likewise. 55 (v7_read_super): Likewise.
56 56
57Sun Dec 30 2001 Manfred Spraul <manfreds@colorfullife.com> 57Sun Dec 30 2001 Manfred Spraul <manfred@colorfullife.com>
58 58
59 * dir.c (dir_commit_chunk): Do not set dir->i_version. 59 * dir.c (dir_commit_chunk): Do not set dir->i_version.
60 (sysv_readdir): Likewise. 60 (sysv_readdir): Likewise.
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 69a085abad6f..cce8b05cba5a 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -103,7 +103,7 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
103 offset = (char *)de - kaddr; 103 offset = (char *)de - kaddr;
104 104
105 over = filldir(dirent, name, strnlen(name,SYSV_NAMELEN), 105 over = filldir(dirent, name, strnlen(name,SYSV_NAMELEN),
106 (n<<PAGE_CACHE_SHIFT) | offset, 106 ((loff_t)n<<PAGE_CACHE_SHIFT) | offset,
107 fs16_to_cpu(SYSV_SB(sb), de->inode), 107 fs16_to_cpu(SYSV_SB(sb), de->inode),
108 DT_UNKNOWN); 108 DT_UNKNOWN);
109 if (over) { 109 if (over) {
@@ -115,7 +115,7 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
115 } 115 }
116 116
117done: 117done:
118 filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset; 118 filp->f_pos = ((loff_t)n << PAGE_CACHE_SHIFT) | offset;
119 unlock_kernel(); 119 unlock_kernel();
120 return 0; 120 return 0;
121} 121}
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 6598a5037ac8..4fae57d9d115 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -41,7 +41,7 @@
41#define uint(x) xuint(x) 41#define uint(x) xuint(x)
42#define xuint(x) __le ## x 42#define xuint(x) __le ## x
43 43
44extern inline int find_next_one_bit (void * addr, int size, int offset) 44static inline int find_next_one_bit (void * addr, int size, int offset)
45{ 45{
46 uintBPL_t * p = ((uintBPL_t *) addr) + (offset / BITS_PER_LONG); 46 uintBPL_t * p = ((uintBPL_t *) addr) + (offset / BITS_PER_LONG);
47 int result = offset & ~(BITS_PER_LONG-1); 47 int result = offset & ~(BITS_PER_LONG-1);
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 8a388289040d..a6f2acc1f15c 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -31,6 +31,7 @@
31#include <asm/uaccess.h> 31#include <asm/uaccess.h>
32#include <linux/kernel.h> 32#include <linux/kernel.h>
33#include <linux/string.h> /* memset */ 33#include <linux/string.h> /* memset */
34#include <linux/capability.h>
34#include <linux/errno.h> 35#include <linux/errno.h>
35#include <linux/smp_lock.h> 36#include <linux/smp_lock.h>
36#include <linux/pagemap.h> 37#include <linux/pagemap.h>
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 4014f17d382e..395e582ee542 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1957,11 +1957,6 @@ int8_t inode_bmap(struct inode *inode, int block, kernel_lb_addr *bloc, uint32_t
1957 printk(KERN_ERR "udf: inode_bmap: block < 0\n"); 1957 printk(KERN_ERR "udf: inode_bmap: block < 0\n");
1958 return -1; 1958 return -1;
1959 } 1959 }
1960 if (!inode)
1961 {
1962 printk(KERN_ERR "udf: inode_bmap: NULL inode\n");
1963 return -1;
1964 }
1965 1960
1966 *extoffset = 0; 1961 *extoffset = 0;
1967 *elen = 0; 1962 *elen = 0;
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index faf1512173eb..3ada9dcf55b8 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -13,6 +13,7 @@
13#include <linux/string.h> 13#include <linux/string.h>
14#include <linux/quotaops.h> 14#include <linux/quotaops.h>
15#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
16#include <linux/capability.h>
16#include <linux/sched.h> 17#include <linux/sched.h>
17#include <linux/bitops.h> 18#include <linux/bitops.h>
18#include <asm/byteorder.h> 19#include <asm/byteorder.h>
@@ -48,7 +49,7 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count
48 49
49 sb = inode->i_sb; 50 sb = inode->i_sb;
50 uspi = UFS_SB(sb)->s_uspi; 51 uspi = UFS_SB(sb)->s_uspi;
51 usb1 = ubh_get_usb_first(USPI_UBH); 52 usb1 = ubh_get_usb_first(uspi);
52 53
53 UFSD(("ENTER, fragment %u, count %u\n", fragment, count)) 54 UFSD(("ENTER, fragment %u, count %u\n", fragment, count))
54 55
@@ -80,8 +81,9 @@ void ufs_free_fragments (struct inode * inode, unsigned fragment, unsigned count
80 for (i = bit; i < end_bit; i++) { 81 for (i = bit; i < end_bit; i++) {
81 if (ubh_isclr (UCPI_UBH, ucpi->c_freeoff, i)) 82 if (ubh_isclr (UCPI_UBH, ucpi->c_freeoff, i))
82 ubh_setbit (UCPI_UBH, ucpi->c_freeoff, i); 83 ubh_setbit (UCPI_UBH, ucpi->c_freeoff, i);
83 else ufs_error (sb, "ufs_free_fragments", 84 else
84 "bit already cleared for fragment %u", i); 85 ufs_error (sb, "ufs_free_fragments",
86 "bit already cleared for fragment %u", i);
85 } 87 }
86 88
87 DQUOT_FREE_BLOCK (inode, count); 89 DQUOT_FREE_BLOCK (inode, count);
@@ -142,7 +144,7 @@ void ufs_free_blocks (struct inode * inode, unsigned fragment, unsigned count) {
142 144
143 sb = inode->i_sb; 145 sb = inode->i_sb;
144 uspi = UFS_SB(sb)->s_uspi; 146 uspi = UFS_SB(sb)->s_uspi;
145 usb1 = ubh_get_usb_first(USPI_UBH); 147 usb1 = ubh_get_usb_first(uspi);
146 148
147 UFSD(("ENTER, fragment %u, count %u\n", fragment, count)) 149 UFSD(("ENTER, fragment %u, count %u\n", fragment, count))
148 150
@@ -246,7 +248,7 @@ unsigned ufs_new_fragments (struct inode * inode, __fs32 * p, unsigned fragment,
246 248
247 sb = inode->i_sb; 249 sb = inode->i_sb;
248 uspi = UFS_SB(sb)->s_uspi; 250 uspi = UFS_SB(sb)->s_uspi;
249 usb1 = ubh_get_usb_first(USPI_UBH); 251 usb1 = ubh_get_usb_first(uspi);
250 *err = -ENOSPC; 252 *err = -ENOSPC;
251 253
252 lock_super (sb); 254 lock_super (sb);
@@ -406,7 +408,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
406 408
407 sb = inode->i_sb; 409 sb = inode->i_sb;
408 uspi = UFS_SB(sb)->s_uspi; 410 uspi = UFS_SB(sb)->s_uspi;
409 usb1 = ubh_get_usb_first (USPI_UBH); 411 usb1 = ubh_get_usb_first (uspi);
410 count = newcount - oldcount; 412 count = newcount - oldcount;
411 413
412 cgno = ufs_dtog(fragment); 414 cgno = ufs_dtog(fragment);
@@ -489,7 +491,7 @@ static unsigned ufs_alloc_fragments (struct inode * inode, unsigned cgno,
489 491
490 sb = inode->i_sb; 492 sb = inode->i_sb;
491 uspi = UFS_SB(sb)->s_uspi; 493 uspi = UFS_SB(sb)->s_uspi;
492 usb1 = ubh_get_usb_first(USPI_UBH); 494 usb1 = ubh_get_usb_first(uspi);
493 oldcg = cgno; 495 oldcg = cgno;
494 496
495 /* 497 /*
@@ -605,7 +607,7 @@ static unsigned ufs_alloccg_block (struct inode * inode,
605 607
606 sb = inode->i_sb; 608 sb = inode->i_sb;
607 uspi = UFS_SB(sb)->s_uspi; 609 uspi = UFS_SB(sb)->s_uspi;
608 usb1 = ubh_get_usb_first(USPI_UBH); 610 usb1 = ubh_get_usb_first(uspi);
609 ucg = ubh_get_ucg(UCPI_UBH); 611 ucg = ubh_get_ucg(UCPI_UBH);
610 612
611 if (goal == 0) { 613 if (goal == 0) {
@@ -662,7 +664,7 @@ static unsigned ufs_bitmap_search (struct super_block * sb,
662 UFSD(("ENTER, cg %u, goal %u, count %u\n", ucpi->c_cgx, goal, count)) 664 UFSD(("ENTER, cg %u, goal %u, count %u\n", ucpi->c_cgx, goal, count))
663 665
664 uspi = UFS_SB(sb)->s_uspi; 666 uspi = UFS_SB(sb)->s_uspi;
665 usb1 = ubh_get_usb_first (USPI_UBH); 667 usb1 = ubh_get_usb_first (uspi);
666 ucg = ubh_get_ucg(UCPI_UBH); 668 ucg = ubh_get_ucg(UCPI_UBH);
667 669
668 if (goal) 670 if (goal)
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 0938945b9cbc..c7a47ed4f430 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -72,7 +72,7 @@ void ufs_free_inode (struct inode * inode)
72 72
73 sb = inode->i_sb; 73 sb = inode->i_sb;
74 uspi = UFS_SB(sb)->s_uspi; 74 uspi = UFS_SB(sb)->s_uspi;
75 usb1 = ubh_get_usb_first(USPI_UBH); 75 usb1 = ubh_get_usb_first(uspi);
76 76
77 ino = inode->i_ino; 77 ino = inode->i_ino;
78 78
@@ -167,7 +167,7 @@ struct inode * ufs_new_inode(struct inode * dir, int mode)
167 ufsi = UFS_I(inode); 167 ufsi = UFS_I(inode);
168 sbi = UFS_SB(sb); 168 sbi = UFS_SB(sb);
169 uspi = sbi->s_uspi; 169 uspi = sbi->s_uspi;
170 usb1 = ubh_get_usb_first(USPI_UBH); 170 usb1 = ubh_get_usb_first(uspi);
171 171
172 lock_super (sb); 172 lock_super (sb);
173 173
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 55f4aa16e3fc..e0c04e36a051 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -61,7 +61,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
61 int n = 0; 61 int n = 0;
62 62
63 63
64 UFSD(("ptrs=uspi->s_apb = %d,double_blocks=%d \n",ptrs,double_blocks)); 64 UFSD(("ptrs=uspi->s_apb = %d,double_blocks=%ld \n",ptrs,double_blocks));
65 if (i_block < 0) { 65 if (i_block < 0) {
66 ufs_warning(inode->i_sb, "ufs_block_to_path", "block < 0"); 66 ufs_warning(inode->i_sb, "ufs_block_to_path", "block < 0");
67 } else if (i_block < direct_blocks) { 67 } else if (i_block < direct_blocks) {
@@ -104,7 +104,7 @@ u64 ufs_frag_map(struct inode *inode, sector_t frag)
104 unsigned flags = UFS_SB(sb)->s_flags; 104 unsigned flags = UFS_SB(sb)->s_flags;
105 u64 temp = 0L; 105 u64 temp = 0L;
106 106
107 UFSD((": frag = %lu depth = %d\n",frag,depth)); 107 UFSD((": frag = %llu depth = %d\n", (unsigned long long)frag, depth));
108 UFSD((": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",uspi->s_fpbshift,uspi->s_apbmask,mask)); 108 UFSD((": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",uspi->s_fpbshift,uspi->s_apbmask,mask));
109 109
110 if (depth == 0) 110 if (depth == 0)
@@ -365,9 +365,10 @@ repeat:
365 sync_dirty_buffer(bh); 365 sync_dirty_buffer(bh);
366 inode->i_ctime = CURRENT_TIME_SEC; 366 inode->i_ctime = CURRENT_TIME_SEC;
367 mark_inode_dirty(inode); 367 mark_inode_dirty(inode);
368 UFSD(("result %u\n", tmp + blockoff));
368out: 369out:
369 brelse (bh); 370 brelse (bh);
370 UFSD(("EXIT, result %u\n", tmp + blockoff)) 371 UFSD(("EXIT\n"));
371 return result; 372 return result;
372} 373}
373 374
@@ -386,7 +387,7 @@ static int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buf
386 387
387 if (!create) { 388 if (!create) {
388 phys64 = ufs_frag_map(inode, fragment); 389 phys64 = ufs_frag_map(inode, fragment);
389 UFSD(("phys64 = %lu \n",phys64)); 390 UFSD(("phys64 = %llu \n",phys64));
390 if (phys64) 391 if (phys64)
391 map_bh(bh_result, sb, phys64); 392 map_bh(bh_result, sb, phys64);
392 return 0; 393 return 0;
@@ -401,7 +402,7 @@ static int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buf
401 402
402 lock_kernel(); 403 lock_kernel();
403 404
404 UFSD(("ENTER, ino %lu, fragment %u\n", inode->i_ino, fragment)) 405 UFSD(("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment))
405 if (fragment < 0) 406 if (fragment < 0)
406 goto abort_negative; 407 goto abort_negative;
407 if (fragment > 408 if (fragment >
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 54828ebcf1ba..d4aacee593ff 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -221,7 +221,7 @@ void ufs_error (struct super_block * sb, const char * function,
221 va_list args; 221 va_list args;
222 222
223 uspi = UFS_SB(sb)->s_uspi; 223 uspi = UFS_SB(sb)->s_uspi;
224 usb1 = ubh_get_usb_first(USPI_UBH); 224 usb1 = ubh_get_usb_first(uspi);
225 225
226 if (!(sb->s_flags & MS_RDONLY)) { 226 if (!(sb->s_flags & MS_RDONLY)) {
227 usb1->fs_clean = UFS_FSBAD; 227 usb1->fs_clean = UFS_FSBAD;
@@ -253,7 +253,7 @@ void ufs_panic (struct super_block * sb, const char * function,
253 va_list args; 253 va_list args;
254 254
255 uspi = UFS_SB(sb)->s_uspi; 255 uspi = UFS_SB(sb)->s_uspi;
256 usb1 = ubh_get_usb_first(USPI_UBH); 256 usb1 = ubh_get_usb_first(uspi);
257 257
258 if (!(sb->s_flags & MS_RDONLY)) { 258 if (!(sb->s_flags & MS_RDONLY)) {
259 usb1->fs_clean = UFS_FSBAD; 259 usb1->fs_clean = UFS_FSBAD;
@@ -420,21 +420,18 @@ static int ufs_read_cylinder_structures (struct super_block *sb) {
420 if (i + uspi->s_fpb > blks) 420 if (i + uspi->s_fpb > blks)
421 size = (blks - i) * uspi->s_fsize; 421 size = (blks - i) * uspi->s_fsize;
422 422
423 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { 423 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
424 ubh = ubh_bread(sb, 424 ubh = ubh_bread(sb,
425 fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_csaddr) + i, size); 425 fs64_to_cpu(sb, usb->fs_u11.fs_u2.fs_csaddr) + i, size);
426 if (!ubh) 426 else
427 goto failed;
428 ubh_ubhcpymem (space, ubh, size);
429 sbi->s_csp[ufs_fragstoblks(i)]=(struct ufs_csum *)space;
430 }
431 else {
432 ubh = ubh_bread(sb, uspi->s_csaddr + i, size); 427 ubh = ubh_bread(sb, uspi->s_csaddr + i, size);
433 if (!ubh) 428
434 goto failed; 429 if (!ubh)
435 ubh_ubhcpymem(space, ubh, size); 430 goto failed;
436 sbi->s_csp[ufs_fragstoblks(i)]=(struct ufs_csum *)space; 431
437 } 432 ubh_ubhcpymem (space, ubh, size);
433 sbi->s_csp[ufs_fragstoblks(i)]=(struct ufs_csum *)space;
434
438 space += size; 435 space += size;
439 ubh_brelse (ubh); 436 ubh_brelse (ubh);
440 ubh = NULL; 437 ubh = NULL;
@@ -539,6 +536,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
539 struct inode *inode; 536 struct inode *inode;
540 unsigned block_size, super_block_size; 537 unsigned block_size, super_block_size;
541 unsigned flags; 538 unsigned flags;
539 unsigned super_block_offset;
542 540
543 uspi = NULL; 541 uspi = NULL;
544 ubh = NULL; 542 ubh = NULL;
@@ -586,10 +584,11 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
586 if (!uspi) 584 if (!uspi)
587 goto failed; 585 goto failed;
588 586
587 super_block_offset=UFS_SBLOCK;
588
589 /* Keep 2Gig file limit. Some UFS variants need to override 589 /* Keep 2Gig file limit. Some UFS variants need to override
590 this but as I don't know which I'll let those in the know loosen 590 this but as I don't know which I'll let those in the know loosen
591 the rules */ 591 the rules */
592
593 switch (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) { 592 switch (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) {
594 case UFS_MOUNT_UFSTYPE_44BSD: 593 case UFS_MOUNT_UFSTYPE_44BSD:
595 UFSD(("ufstype=44bsd\n")) 594 UFSD(("ufstype=44bsd\n"))
@@ -601,7 +600,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
601 flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD; 600 flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD;
602 break; 601 break;
603 case UFS_MOUNT_UFSTYPE_UFS2: 602 case UFS_MOUNT_UFSTYPE_UFS2:
604 UFSD(("ufstype=ufs2\n")) 603 UFSD(("ufstype=ufs2\n"));
604 super_block_offset=SBLOCK_UFS2;
605 uspi->s_fsize = block_size = 512; 605 uspi->s_fsize = block_size = 512;
606 uspi->s_fmask = ~(512 - 1); 606 uspi->s_fmask = ~(512 - 1);
607 uspi->s_fshift = 9; 607 uspi->s_fshift = 9;
@@ -725,19 +725,16 @@ again:
725 /* 725 /*
726 * read ufs super block from device 726 * read ufs super block from device
727 */ 727 */
728 if ( (flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { 728
729 ubh = ubh_bread_uspi(uspi, sb, uspi->s_sbbase + SBLOCK_UFS2/block_size, super_block_size); 729 ubh = ubh_bread_uspi(uspi, sb, uspi->s_sbbase + super_block_offset/block_size, super_block_size);
730 } 730
731 else {
732 ubh = ubh_bread_uspi(uspi, sb, uspi->s_sbbase + UFS_SBLOCK/block_size, super_block_size);
733 }
734 if (!ubh) 731 if (!ubh)
735 goto failed; 732 goto failed;
736 733
737 734
738 usb1 = ubh_get_usb_first(USPI_UBH); 735 usb1 = ubh_get_usb_first(uspi);
739 usb2 = ubh_get_usb_second(USPI_UBH); 736 usb2 = ubh_get_usb_second(uspi);
740 usb3 = ubh_get_usb_third(USPI_UBH); 737 usb3 = ubh_get_usb_third(uspi);
741 usb = (struct ufs_super_block *) 738 usb = (struct ufs_super_block *)
742 ((struct ufs_buffer_head *)uspi)->bh[0]->b_data ; 739 ((struct ufs_buffer_head *)uspi)->bh[0]->b_data ;
743 740
@@ -1006,8 +1003,8 @@ static void ufs_write_super (struct super_block *sb) {
1006 UFSD(("ENTER\n")) 1003 UFSD(("ENTER\n"))
1007 flags = UFS_SB(sb)->s_flags; 1004 flags = UFS_SB(sb)->s_flags;
1008 uspi = UFS_SB(sb)->s_uspi; 1005 uspi = UFS_SB(sb)->s_uspi;
1009 usb1 = ubh_get_usb_first(USPI_UBH); 1006 usb1 = ubh_get_usb_first(uspi);
1010 usb3 = ubh_get_usb_third(USPI_UBH); 1007 usb3 = ubh_get_usb_third(uspi);
1011 1008
1012 if (!(sb->s_flags & MS_RDONLY)) { 1009 if (!(sb->s_flags & MS_RDONLY)) {
1013 usb1->fs_time = cpu_to_fs32(sb, get_seconds()); 1010 usb1->fs_time = cpu_to_fs32(sb, get_seconds());
@@ -1049,8 +1046,8 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1049 1046
1050 uspi = UFS_SB(sb)->s_uspi; 1047 uspi = UFS_SB(sb)->s_uspi;
1051 flags = UFS_SB(sb)->s_flags; 1048 flags = UFS_SB(sb)->s_flags;
1052 usb1 = ubh_get_usb_first(USPI_UBH); 1049 usb1 = ubh_get_usb_first(uspi);
1053 usb3 = ubh_get_usb_third(USPI_UBH); 1050 usb3 = ubh_get_usb_third(uspi);
1054 1051
1055 /* 1052 /*
1056 * Allow the "check" option to be passed as a remount option. 1053 * Allow the "check" option to be passed as a remount option.
@@ -1124,7 +1121,7 @@ static int ufs_statfs (struct super_block *sb, struct kstatfs *buf)
1124 lock_kernel(); 1121 lock_kernel();
1125 1122
1126 uspi = UFS_SB(sb)->s_uspi; 1123 uspi = UFS_SB(sb)->s_uspi;
1127 usb1 = ubh_get_usb_first (USPI_UBH); 1124 usb1 = ubh_get_usb_first (uspi);
1128 usb = (struct ufs_super_block *) 1125 usb = (struct ufs_super_block *)
1129 ((struct ufs_buffer_head *)uspi)->bh[0]->b_data ; 1126 ((struct ufs_buffer_head *)uspi)->bh[0]->b_data ;
1130 1127
@@ -1275,7 +1272,7 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type,
1275 size_t towrite = len; 1272 size_t towrite = len;
1276 struct buffer_head *bh; 1273 struct buffer_head *bh;
1277 1274
1278 down(&inode->i_sem); 1275 mutex_lock(&inode->i_mutex);
1279 while (towrite > 0) { 1276 while (towrite > 0) {
1280 tocopy = sb->s_blocksize - offset < towrite ? 1277 tocopy = sb->s_blocksize - offset < towrite ?
1281 sb->s_blocksize - offset : towrite; 1278 sb->s_blocksize - offset : towrite;
@@ -1296,14 +1293,16 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type,
1296 blk++; 1293 blk++;
1297 } 1294 }
1298out: 1295out:
1299 if (len == towrite) 1296 if (len == towrite) {
1297 mutex_unlock(&inode->i_mutex);
1300 return err; 1298 return err;
1299 }
1301 if (inode->i_size < off+len-towrite) 1300 if (inode->i_size < off+len-towrite)
1302 i_size_write(inode, off+len-towrite); 1301 i_size_write(inode, off+len-towrite);
1303 inode->i_version++; 1302 inode->i_version++;
1304 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; 1303 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
1305 mark_inode_dirty(inode); 1304 mark_inode_dirty(inode);
1306 up(&inode->i_sem); 1305 mutex_unlock(&inode->i_mutex);
1307 return len - towrite; 1306 return len - towrite;
1308} 1307}
1309 1308
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index b2640076679a..48d6d9bcc157 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -249,18 +249,28 @@ extern void _ubh_memcpyubh_(struct ufs_sb_private_info *, struct ufs_buffer_head
249 249
250 250
251/* 251/*
252 * macros to get important structures from ufs_buffer_head 252 * macros and inline function to get important structures from ufs_sb_private_info
253 */ 253 */
254#define ubh_get_usb_first(ubh) \
255 ((struct ufs_super_block_first *)((ubh)->bh[0]->b_data))
256 254
257#define ubh_get_usb_second(ubh) \ 255static inline void *get_usb_offset(struct ufs_sb_private_info *uspi,
258 ((struct ufs_super_block_second *)(ubh)-> \ 256 unsigned int offset)
259 bh[UFS_SECTOR_SIZE >> uspi->s_fshift]->b_data + (UFS_SECTOR_SIZE & ~uspi->s_fmask)) 257{
258 unsigned int index;
259
260 index = offset >> uspi->s_fshift;
261 offset &= ~uspi->s_fmask;
262 return uspi->s_ubh.bh[index]->b_data + offset;
263}
264
265#define ubh_get_usb_first(uspi) \
266 ((struct ufs_super_block_first *)get_usb_offset((uspi), 0))
267
268#define ubh_get_usb_second(uspi) \
269 ((struct ufs_super_block_second *)get_usb_offset((uspi), UFS_SECTOR_SIZE))
270
271#define ubh_get_usb_third(uspi) \
272 ((struct ufs_super_block_third *)get_usb_offset((uspi), 2*UFS_SECTOR_SIZE))
260 273
261#define ubh_get_usb_third(ubh) \
262 ((struct ufs_super_block_third *)((ubh)-> \
263 bh[UFS_SECTOR_SIZE*2 >> uspi->s_fshift]->b_data + (UFS_SECTOR_SIZE*2 & ~uspi->s_fmask)))
264 274
265#define ubh_get_ucg(ubh) \ 275#define ubh_get_ucg(ubh) \
266 ((struct ufs_cylinder_group *)((ubh)->bh[0]->b_data)) 276 ((struct ufs_cylinder_group *)((ubh)->bh[0]->b_data))
diff --git a/fs/xattr.c b/fs/xattr.c
index bcc2156d4d28..80eca7d3d69f 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -19,6 +19,149 @@
19#include <linux/fsnotify.h> 19#include <linux/fsnotify.h>
20#include <asm/uaccess.h> 20#include <asm/uaccess.h>
21 21
22
23/*
24 * Check permissions for extended attribute access. This is a bit complicated
25 * because different namespaces have very different rules.
26 */
27static int
28xattr_permission(struct inode *inode, const char *name, int mask)
29{
30 /*
31 * We can never set or remove an extended attribute on a read-only
32 * filesystem or on an immutable / append-only inode.
33 */
34 if (mask & MAY_WRITE) {
35 if (IS_RDONLY(inode))
36 return -EROFS;
37 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
38 return -EPERM;
39 }
40
41 /*
42 * No restriction for security.* and system.* from the VFS. Decision
43 * on these is left to the underlying filesystem / security module.
44 */
45 if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) ||
46 !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
47 return 0;
48
49 /*
50 * The trusted.* namespace can only accessed by a privilegued user.
51 */
52 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
53 return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
54
55 if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
56 if (!S_ISREG(inode->i_mode) &&
57 (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
58 return -EPERM;
59 }
60
61 return permission(inode, mask, NULL);
62}
63
64int
65vfs_setxattr(struct dentry *dentry, char *name, void *value,
66 size_t size, int flags)
67{
68 struct inode *inode = dentry->d_inode;
69 int error;
70
71 error = xattr_permission(inode, name, MAY_WRITE);
72 if (error)
73 return error;
74
75 mutex_lock(&inode->i_mutex);
76 error = security_inode_setxattr(dentry, name, value, size, flags);
77 if (error)
78 goto out;
79 error = -EOPNOTSUPP;
80 if (inode->i_op->setxattr) {
81 error = inode->i_op->setxattr(dentry, name, value, size, flags);
82 if (!error) {
83 fsnotify_xattr(dentry);
84 security_inode_post_setxattr(dentry, name, value,
85 size, flags);
86 }
87 } else if (!strncmp(name, XATTR_SECURITY_PREFIX,
88 XATTR_SECURITY_PREFIX_LEN)) {
89 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
90 error = security_inode_setsecurity(inode, suffix, value,
91 size, flags);
92 if (!error)
93 fsnotify_xattr(dentry);
94 }
95out:
96 mutex_unlock(&inode->i_mutex);
97 return error;
98}
99EXPORT_SYMBOL_GPL(vfs_setxattr);
100
101ssize_t
102vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size)
103{
104 struct inode *inode = dentry->d_inode;
105 int error;
106
107 error = xattr_permission(inode, name, MAY_READ);
108 if (error)
109 return error;
110
111 error = security_inode_getxattr(dentry, name);
112 if (error)
113 return error;
114
115 if (inode->i_op->getxattr)
116 error = inode->i_op->getxattr(dentry, name, value, size);
117 else
118 error = -EOPNOTSUPP;
119
120 if (!strncmp(name, XATTR_SECURITY_PREFIX,
121 XATTR_SECURITY_PREFIX_LEN)) {
122 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
123 int ret = security_inode_getsecurity(inode, suffix, value,
124 size, error);
125 /*
126 * Only overwrite the return value if a security module
127 * is actually active.
128 */
129 if (ret != -EOPNOTSUPP)
130 error = ret;
131 }
132
133 return error;
134}
135EXPORT_SYMBOL_GPL(vfs_getxattr);
136
137int
138vfs_removexattr(struct dentry *dentry, char *name)
139{
140 struct inode *inode = dentry->d_inode;
141 int error;
142
143 if (!inode->i_op->removexattr)
144 return -EOPNOTSUPP;
145
146 error = xattr_permission(inode, name, MAY_WRITE);
147 if (error)
148 return error;
149
150 error = security_inode_removexattr(dentry, name);
151 if (error)
152 return error;
153
154 mutex_lock(&inode->i_mutex);
155 error = inode->i_op->removexattr(dentry, name);
156 mutex_unlock(&inode->i_mutex);
157
158 if (!error)
159 fsnotify_xattr(dentry);
160 return error;
161}
162EXPORT_SYMBOL_GPL(vfs_removexattr);
163
164
22/* 165/*
23 * Extended attribute SET operations 166 * Extended attribute SET operations
24 */ 167 */
@@ -51,29 +194,7 @@ setxattr(struct dentry *d, char __user *name, void __user *value,
51 } 194 }
52 } 195 }
53 196
54 down(&d->d_inode->i_sem); 197 error = vfs_setxattr(d, kname, kvalue, size, flags);
55 error = security_inode_setxattr(d, kname, kvalue, size, flags);
56 if (error)
57 goto out;
58 error = -EOPNOTSUPP;
59 if (d->d_inode->i_op && d->d_inode->i_op->setxattr) {
60 error = d->d_inode->i_op->setxattr(d, kname, kvalue,
61 size, flags);
62 if (!error) {
63 fsnotify_xattr(d);
64 security_inode_post_setxattr(d, kname, kvalue,
65 size, flags);
66 }
67 } else if (!strncmp(kname, XATTR_SECURITY_PREFIX,
68 sizeof XATTR_SECURITY_PREFIX - 1)) {
69 const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1;
70 error = security_inode_setsecurity(d->d_inode, suffix, kvalue,
71 size, flags);
72 if (!error)
73 fsnotify_xattr(d);
74 }
75out:
76 up(&d->d_inode->i_sem);
77 kfree(kvalue); 198 kfree(kvalue);
78 return error; 199 return error;
79} 200}
@@ -147,22 +268,7 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size)
147 return -ENOMEM; 268 return -ENOMEM;
148 } 269 }
149 270
150 error = security_inode_getxattr(d, kname); 271 error = vfs_getxattr(d, kname, kvalue, size);
151 if (error)
152 goto out;
153 error = -EOPNOTSUPP;
154 if (d->d_inode->i_op && d->d_inode->i_op->getxattr)
155 error = d->d_inode->i_op->getxattr(d, kname, kvalue, size);
156
157 if (!strncmp(kname, XATTR_SECURITY_PREFIX,
158 sizeof XATTR_SECURITY_PREFIX - 1)) {
159 const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1;
160 int rv = security_inode_getsecurity(d->d_inode, suffix, kvalue,
161 size, error);
162 /* Security module active: overwrite error value */
163 if (rv != -EOPNOTSUPP)
164 error = rv;
165 }
166 if (error > 0) { 272 if (error > 0) {
167 if (size && copy_to_user(value, kvalue, error)) 273 if (size && copy_to_user(value, kvalue, error))
168 error = -EFAULT; 274 error = -EFAULT;
@@ -171,7 +277,6 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size)
171 than XATTR_SIZE_MAX bytes. Not possible. */ 277 than XATTR_SIZE_MAX bytes. Not possible. */
172 error = -E2BIG; 278 error = -E2BIG;
173 } 279 }
174out:
175 kfree(kvalue); 280 kfree(kvalue);
176 return error; 281 return error;
177} 282}
@@ -318,19 +423,7 @@ removexattr(struct dentry *d, char __user *name)
318 if (error < 0) 423 if (error < 0)
319 return error; 424 return error;
320 425
321 error = -EOPNOTSUPP; 426 return vfs_removexattr(d, kname);
322 if (d->d_inode->i_op && d->d_inode->i_op->removexattr) {
323 error = security_inode_removexattr(d, kname);
324 if (error)
325 goto out;
326 down(&d->d_inode->i_sem);
327 error = d->d_inode->i_op->removexattr(d, kname);
328 up(&d->d_inode->i_sem);
329 if (!error)
330 fsnotify_xattr(d);
331 }
332out:
333 return error;
334} 427}
335 428
336asmlinkage long 429asmlinkage long
diff --git a/fs/xfs/Kbuild b/fs/xfs/Kbuild
new file mode 100644
index 000000000000..2566e96706f1
--- /dev/null
+++ b/fs/xfs/Kbuild
@@ -0,0 +1,6 @@
1#
2# The xfs people like to share Makefile with 2.6 and 2.4.
3# Utilise file named Kbuild file which has precedence over Makefile.
4#
5
6include $(srctree)/$(obj)/Makefile-linux-2.6
diff --git a/fs/xfs/linux-2.6/mutex.h b/fs/xfs/linux-2.6/mutex.h
index ce773d89a923..2a88d56c4dc2 100644
--- a/fs/xfs/linux-2.6/mutex.h
+++ b/fs/xfs/linux-2.6/mutex.h
@@ -18,22 +18,8 @@
18#ifndef __XFS_SUPPORT_MUTEX_H__ 18#ifndef __XFS_SUPPORT_MUTEX_H__
19#define __XFS_SUPPORT_MUTEX_H__ 19#define __XFS_SUPPORT_MUTEX_H__
20 20
21#include <linux/spinlock.h> 21#include <linux/mutex.h>
22#include <asm/semaphore.h>
23 22
24/* 23typedef struct mutex mutex_t;
25 * Map the mutex'es from IRIX to Linux semaphores.
26 *
27 * Destroy just simply initializes to -99 which should block all other
28 * callers.
29 */
30#define MUTEX_DEFAULT 0x0
31typedef struct semaphore mutex_t;
32
33#define mutex_init(lock, type, name) sema_init(lock, 1)
34#define mutex_destroy(lock) sema_init(lock, -99)
35#define mutex_lock(lock, num) down(lock)
36#define mutex_trylock(lock) (down_trylock(lock) ? 0 : 1)
37#define mutex_unlock(lock) up(lock)
38 24
39#endif /* __XFS_SUPPORT_MUTEX_H__ */ 25#endif /* __XFS_SUPPORT_MUTEX_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 94d3cdfbf9b8..120626789406 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -40,11 +40,10 @@
40#include "xfs_rw.h" 40#include "xfs_rw.h"
41#include "xfs_iomap.h" 41#include "xfs_iomap.h"
42#include <linux/mpage.h> 42#include <linux/mpage.h>
43#include <linux/pagevec.h>
43#include <linux/writeback.h> 44#include <linux/writeback.h>
44 45
45STATIC void xfs_count_page_state(struct page *, int *, int *, int *); 46STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
46STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *,
47 struct writeback_control *wbc, void *, int, int);
48 47
49#if defined(XFS_RW_TRACE) 48#if defined(XFS_RW_TRACE)
50void 49void
@@ -55,17 +54,15 @@ xfs_page_trace(
55 int mask) 54 int mask)
56{ 55{
57 xfs_inode_t *ip; 56 xfs_inode_t *ip;
58 bhv_desc_t *bdp;
59 vnode_t *vp = LINVFS_GET_VP(inode); 57 vnode_t *vp = LINVFS_GET_VP(inode);
60 loff_t isize = i_size_read(inode); 58 loff_t isize = i_size_read(inode);
61 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT; 59 loff_t offset = page_offset(page);
62 int delalloc = -1, unmapped = -1, unwritten = -1; 60 int delalloc = -1, unmapped = -1, unwritten = -1;
63 61
64 if (page_has_buffers(page)) 62 if (page_has_buffers(page))
65 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); 63 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
66 64
67 bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); 65 ip = xfs_vtoi(vp);
68 ip = XFS_BHVTOI(bdp);
69 if (!ip->i_rwtrace) 66 if (!ip->i_rwtrace)
70 return; 67 return;
71 68
@@ -103,15 +100,56 @@ xfs_finish_ioend(
103 queue_work(xfsdatad_workqueue, &ioend->io_work); 100 queue_work(xfsdatad_workqueue, &ioend->io_work);
104} 101}
105 102
103/*
104 * We're now finished for good with this ioend structure.
105 * Update the page state via the associated buffer_heads,
106 * release holds on the inode and bio, and finally free
107 * up memory. Do not use the ioend after this.
108 */
106STATIC void 109STATIC void
107xfs_destroy_ioend( 110xfs_destroy_ioend(
108 xfs_ioend_t *ioend) 111 xfs_ioend_t *ioend)
109{ 112{
113 struct buffer_head *bh, *next;
114
115 for (bh = ioend->io_buffer_head; bh; bh = next) {
116 next = bh->b_private;
117 bh->b_end_io(bh, ioend->io_uptodate);
118 }
119
110 vn_iowake(ioend->io_vnode); 120 vn_iowake(ioend->io_vnode);
111 mempool_free(ioend, xfs_ioend_pool); 121 mempool_free(ioend, xfs_ioend_pool);
112} 122}
113 123
114/* 124/*
125 * Buffered IO write completion for delayed allocate extents.
126 * TODO: Update ondisk isize now that we know the file data
127 * has been flushed (i.e. the notorious "NULL file" problem).
128 */
129STATIC void
130xfs_end_bio_delalloc(
131 void *data)
132{
133 xfs_ioend_t *ioend = data;
134
135 xfs_destroy_ioend(ioend);
136}
137
138/*
139 * Buffered IO write completion for regular, written extents.
140 */
141STATIC void
142xfs_end_bio_written(
143 void *data)
144{
145 xfs_ioend_t *ioend = data;
146
147 xfs_destroy_ioend(ioend);
148}
149
150/*
151 * IO write completion for unwritten extents.
152 *
115 * Issue transactions to convert a buffer range from unwritten 153 * Issue transactions to convert a buffer range from unwritten
116 * to written extents. 154 * to written extents.
117 */ 155 */
@@ -123,21 +161,10 @@ xfs_end_bio_unwritten(
123 vnode_t *vp = ioend->io_vnode; 161 vnode_t *vp = ioend->io_vnode;
124 xfs_off_t offset = ioend->io_offset; 162 xfs_off_t offset = ioend->io_offset;
125 size_t size = ioend->io_size; 163 size_t size = ioend->io_size;
126 struct buffer_head *bh, *next;
127 int error; 164 int error;
128 165
129 if (ioend->io_uptodate) 166 if (ioend->io_uptodate)
130 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); 167 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
131
132 /* ioend->io_buffer_head is only non-NULL for buffered I/O */
133 for (bh = ioend->io_buffer_head; bh; bh = next) {
134 next = bh->b_private;
135
136 bh->b_end_io = NULL;
137 clear_buffer_unwritten(bh);
138 end_buffer_async_write(bh, ioend->io_uptodate);
139 }
140
141 xfs_destroy_ioend(ioend); 168 xfs_destroy_ioend(ioend);
142} 169}
143 170
@@ -149,7 +176,8 @@ xfs_end_bio_unwritten(
149 */ 176 */
150STATIC xfs_ioend_t * 177STATIC xfs_ioend_t *
151xfs_alloc_ioend( 178xfs_alloc_ioend(
152 struct inode *inode) 179 struct inode *inode,
180 unsigned int type)
153{ 181{
154 xfs_ioend_t *ioend; 182 xfs_ioend_t *ioend;
155 183
@@ -162,45 +190,25 @@ xfs_alloc_ioend(
162 */ 190 */
163 atomic_set(&ioend->io_remaining, 1); 191 atomic_set(&ioend->io_remaining, 1);
164 ioend->io_uptodate = 1; /* cleared if any I/O fails */ 192 ioend->io_uptodate = 1; /* cleared if any I/O fails */
193 ioend->io_list = NULL;
194 ioend->io_type = type;
165 ioend->io_vnode = LINVFS_GET_VP(inode); 195 ioend->io_vnode = LINVFS_GET_VP(inode);
166 ioend->io_buffer_head = NULL; 196 ioend->io_buffer_head = NULL;
197 ioend->io_buffer_tail = NULL;
167 atomic_inc(&ioend->io_vnode->v_iocount); 198 atomic_inc(&ioend->io_vnode->v_iocount);
168 ioend->io_offset = 0; 199 ioend->io_offset = 0;
169 ioend->io_size = 0; 200 ioend->io_size = 0;
170 201
171 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); 202 if (type == IOMAP_UNWRITTEN)
203 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
204 else if (type == IOMAP_DELAY)
205 INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend);
206 else
207 INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend);
172 208
173 return ioend; 209 return ioend;
174} 210}
175 211
176void
177linvfs_unwritten_done(
178 struct buffer_head *bh,
179 int uptodate)
180{
181 xfs_ioend_t *ioend = bh->b_private;
182 static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED;
183 unsigned long flags;
184
185 ASSERT(buffer_unwritten(bh));
186 bh->b_end_io = NULL;
187
188 if (!uptodate)
189 ioend->io_uptodate = 0;
190
191 /*
192 * Deep magic here. We reuse b_private in the buffer_heads to build
193 * a chain for completing the I/O from user context after we've issued
194 * a transaction to convert the unwritten extent.
195 */
196 spin_lock_irqsave(&unwritten_done_lock, flags);
197 bh->b_private = ioend->io_buffer_head;
198 ioend->io_buffer_head = bh;
199 spin_unlock_irqrestore(&unwritten_done_lock, flags);
200
201 xfs_finish_ioend(ioend);
202}
203
204STATIC int 212STATIC int
205xfs_map_blocks( 213xfs_map_blocks(
206 struct inode *inode, 214 struct inode *inode,
@@ -218,138 +226,283 @@ xfs_map_blocks(
218 return -error; 226 return -error;
219} 227}
220 228
229STATIC inline int
230xfs_iomap_valid(
231 xfs_iomap_t *iomapp,
232 loff_t offset)
233{
234 return offset >= iomapp->iomap_offset &&
235 offset < iomapp->iomap_offset + iomapp->iomap_bsize;
236}
237
221/* 238/*
222 * Finds the corresponding mapping in block @map array of the 239 * BIO completion handler for buffered IO.
223 * given @offset within a @page.
224 */ 240 */
225STATIC xfs_iomap_t * 241STATIC int
226xfs_offset_to_map( 242xfs_end_bio(
243 struct bio *bio,
244 unsigned int bytes_done,
245 int error)
246{
247 xfs_ioend_t *ioend = bio->bi_private;
248
249 if (bio->bi_size)
250 return 1;
251
252 ASSERT(ioend);
253 ASSERT(atomic_read(&bio->bi_cnt) >= 1);
254
255 /* Toss bio and pass work off to an xfsdatad thread */
256 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
257 ioend->io_uptodate = 0;
258 bio->bi_private = NULL;
259 bio->bi_end_io = NULL;
260
261 bio_put(bio);
262 xfs_finish_ioend(ioend);
263 return 0;
264}
265
266STATIC void
267xfs_submit_ioend_bio(
268 xfs_ioend_t *ioend,
269 struct bio *bio)
270{
271 atomic_inc(&ioend->io_remaining);
272
273 bio->bi_private = ioend;
274 bio->bi_end_io = xfs_end_bio;
275
276 submit_bio(WRITE, bio);
277 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
278 bio_put(bio);
279}
280
281STATIC struct bio *
282xfs_alloc_ioend_bio(
283 struct buffer_head *bh)
284{
285 struct bio *bio;
286 int nvecs = bio_get_nr_vecs(bh->b_bdev);
287
288 do {
289 bio = bio_alloc(GFP_NOIO, nvecs);
290 nvecs >>= 1;
291 } while (!bio);
292
293 ASSERT(bio->bi_private == NULL);
294 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
295 bio->bi_bdev = bh->b_bdev;
296 bio_get(bio);
297 return bio;
298}
299
300STATIC void
301xfs_start_buffer_writeback(
302 struct buffer_head *bh)
303{
304 ASSERT(buffer_mapped(bh));
305 ASSERT(buffer_locked(bh));
306 ASSERT(!buffer_delay(bh));
307 ASSERT(!buffer_unwritten(bh));
308
309 mark_buffer_async_write(bh);
310 set_buffer_uptodate(bh);
311 clear_buffer_dirty(bh);
312}
313
314STATIC void
315xfs_start_page_writeback(
227 struct page *page, 316 struct page *page,
228 xfs_iomap_t *iomapp, 317 struct writeback_control *wbc,
229 unsigned long offset) 318 int clear_dirty,
319 int buffers)
320{
321 ASSERT(PageLocked(page));
322 ASSERT(!PageWriteback(page));
323 set_page_writeback(page);
324 if (clear_dirty)
325 clear_page_dirty(page);
326 unlock_page(page);
327 if (!buffers) {
328 end_page_writeback(page);
329 wbc->pages_skipped++; /* We didn't write this page */
330 }
331}
332
333static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
334{
335 return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
336}
337
338/*
339 * Submit all of the bios for all of the ioends we have saved up, covering the
340 * initial writepage page and also any probed pages.
341 *
342 * Because we may have multiple ioends spanning a page, we need to start
343 * writeback on all the buffers before we submit them for I/O. If we mark the
344 * buffers as we got, then we can end up with a page that only has buffers
345 * marked async write and I/O complete on can occur before we mark the other
346 * buffers async write.
347 *
348 * The end result of this is that we trip a bug in end_page_writeback() because
349 * we call it twice for the one page as the code in end_buffer_async_write()
350 * assumes that all buffers on the page are started at the same time.
351 *
352 * The fix is two passes across the ioend list - one to start writeback on the
353 * bufferheads, and then the second one submit them for I/O.
354 */
355STATIC void
356xfs_submit_ioend(
357 xfs_ioend_t *ioend)
358{
359 xfs_ioend_t *head = ioend;
360 xfs_ioend_t *next;
361 struct buffer_head *bh;
362 struct bio *bio;
363 sector_t lastblock = 0;
364
365 /* Pass 1 - start writeback */
366 do {
367 next = ioend->io_list;
368 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
369 xfs_start_buffer_writeback(bh);
370 }
371 } while ((ioend = next) != NULL);
372
373 /* Pass 2 - submit I/O */
374 ioend = head;
375 do {
376 next = ioend->io_list;
377 bio = NULL;
378
379 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
380
381 if (!bio) {
382 retry:
383 bio = xfs_alloc_ioend_bio(bh);
384 } else if (bh->b_blocknr != lastblock + 1) {
385 xfs_submit_ioend_bio(ioend, bio);
386 goto retry;
387 }
388
389 if (bio_add_buffer(bio, bh) != bh->b_size) {
390 xfs_submit_ioend_bio(ioend, bio);
391 goto retry;
392 }
393
394 lastblock = bh->b_blocknr;
395 }
396 if (bio)
397 xfs_submit_ioend_bio(ioend, bio);
398 xfs_finish_ioend(ioend);
399 } while ((ioend = next) != NULL);
400}
401
402/*
403 * Cancel submission of all buffer_heads so far in this endio.
404 * Toss the endio too. Only ever called for the initial page
405 * in a writepage request, so only ever one page.
406 */
407STATIC void
408xfs_cancel_ioend(
409 xfs_ioend_t *ioend)
410{
411 xfs_ioend_t *next;
412 struct buffer_head *bh, *next_bh;
413
414 do {
415 next = ioend->io_list;
416 bh = ioend->io_buffer_head;
417 do {
418 next_bh = bh->b_private;
419 clear_buffer_async_write(bh);
420 unlock_buffer(bh);
421 } while ((bh = next_bh) != NULL);
422
423 vn_iowake(ioend->io_vnode);
424 mempool_free(ioend, xfs_ioend_pool);
425 } while ((ioend = next) != NULL);
426}
427
428/*
429 * Test to see if we've been building up a completion structure for
430 * earlier buffers -- if so, we try to append to this ioend if we
431 * can, otherwise we finish off any current ioend and start another.
432 * Return true if we've finished the given ioend.
433 */
434STATIC void
435xfs_add_to_ioend(
436 struct inode *inode,
437 struct buffer_head *bh,
438 xfs_off_t offset,
439 unsigned int type,
440 xfs_ioend_t **result,
441 int need_ioend)
230{ 442{
231 loff_t full_offset; /* offset from start of file */ 443 xfs_ioend_t *ioend = *result;
232 444
233 ASSERT(offset < PAGE_CACHE_SIZE); 445 if (!ioend || need_ioend || type != ioend->io_type) {
446 xfs_ioend_t *previous = *result;
234 447
235 full_offset = page->index; /* NB: using 64bit number */ 448 ioend = xfs_alloc_ioend(inode, type);
236 full_offset <<= PAGE_CACHE_SHIFT; /* offset from file start */ 449 ioend->io_offset = offset;
237 full_offset += offset; /* offset from page start */ 450 ioend->io_buffer_head = bh;
451 ioend->io_buffer_tail = bh;
452 if (previous)
453 previous->io_list = ioend;
454 *result = ioend;
455 } else {
456 ioend->io_buffer_tail->b_private = bh;
457 ioend->io_buffer_tail = bh;
458 }
238 459
239 if (full_offset < iomapp->iomap_offset) 460 bh->b_private = NULL;
240 return NULL; 461 ioend->io_size += bh->b_size;
241 if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset)
242 return iomapp;
243 return NULL;
244} 462}
245 463
246STATIC void 464STATIC void
247xfs_map_at_offset( 465xfs_map_at_offset(
248 struct page *page,
249 struct buffer_head *bh, 466 struct buffer_head *bh,
250 unsigned long offset, 467 loff_t offset,
251 int block_bits, 468 int block_bits,
252 xfs_iomap_t *iomapp) 469 xfs_iomap_t *iomapp)
253{ 470{
254 xfs_daddr_t bn; 471 xfs_daddr_t bn;
255 loff_t delta;
256 int sector_shift; 472 int sector_shift;
257 473
258 ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); 474 ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
259 ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); 475 ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
260 ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL); 476 ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);
261 477
262 delta = page->index;
263 delta <<= PAGE_CACHE_SHIFT;
264 delta += offset;
265 delta -= iomapp->iomap_offset;
266 delta >>= block_bits;
267
268 sector_shift = block_bits - BBSHIFT; 478 sector_shift = block_bits - BBSHIFT;
269 bn = iomapp->iomap_bn >> sector_shift; 479 bn = (iomapp->iomap_bn >> sector_shift) +
270 bn += delta; 480 ((offset - iomapp->iomap_offset) >> block_bits);
271 BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME)); 481
482 ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME));
272 ASSERT((bn << sector_shift) >= iomapp->iomap_bn); 483 ASSERT((bn << sector_shift) >= iomapp->iomap_bn);
273 484
274 lock_buffer(bh); 485 lock_buffer(bh);
275 bh->b_blocknr = bn; 486 bh->b_blocknr = bn;
276 bh->b_bdev = iomapp->iomap_target->pbr_bdev; 487 bh->b_bdev = iomapp->iomap_target->bt_bdev;
277 set_buffer_mapped(bh); 488 set_buffer_mapped(bh);
278 clear_buffer_delay(bh); 489 clear_buffer_delay(bh);
490 clear_buffer_unwritten(bh);
279} 491}
280 492
281/* 493/*
282 * Look for a page at index which is unlocked and contains our 494 * Look for a page at index that is suitable for clustering.
283 * unwritten extent flagged buffers at its head. Returns page
284 * locked and with an extra reference count, and length of the
285 * unwritten extent component on this page that we can write,
286 * in units of filesystem blocks.
287 */
288STATIC struct page *
289xfs_probe_unwritten_page(
290 struct address_space *mapping,
291 pgoff_t index,
292 xfs_iomap_t *iomapp,
293 xfs_ioend_t *ioend,
294 unsigned long max_offset,
295 unsigned long *fsbs,
296 unsigned int bbits)
297{
298 struct page *page;
299
300 page = find_trylock_page(mapping, index);
301 if (!page)
302 return NULL;
303 if (PageWriteback(page))
304 goto out;
305
306 if (page->mapping && page_has_buffers(page)) {
307 struct buffer_head *bh, *head;
308 unsigned long p_offset = 0;
309
310 *fsbs = 0;
311 bh = head = page_buffers(page);
312 do {
313 if (!buffer_unwritten(bh) || !buffer_uptodate(bh))
314 break;
315 if (!xfs_offset_to_map(page, iomapp, p_offset))
316 break;
317 if (p_offset >= max_offset)
318 break;
319 xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
320 set_buffer_unwritten_io(bh);
321 bh->b_private = ioend;
322 p_offset += bh->b_size;
323 (*fsbs)++;
324 } while ((bh = bh->b_this_page) != head);
325
326 if (p_offset)
327 return page;
328 }
329
330out:
331 unlock_page(page);
332 return NULL;
333}
334
335/*
336 * Look for a page at index which is unlocked and not mapped
337 * yet - clustering for mmap write case.
338 */ 495 */
339STATIC unsigned int 496STATIC unsigned int
340xfs_probe_unmapped_page( 497xfs_probe_page(
341 struct address_space *mapping, 498 struct page *page,
342 pgoff_t index, 499 unsigned int pg_offset,
343 unsigned int pg_offset) 500 int mapped)
344{ 501{
345 struct page *page;
346 int ret = 0; 502 int ret = 0;
347 503
348 page = find_trylock_page(mapping, index);
349 if (!page)
350 return 0;
351 if (PageWriteback(page)) 504 if (PageWriteback(page))
352 goto out; 505 return 0;
353 506
354 if (page->mapping && PageDirty(page)) { 507 if (page->mapping && PageDirty(page)) {
355 if (page_has_buffers(page)) { 508 if (page_has_buffers(page)) {
@@ -357,79 +510,101 @@ xfs_probe_unmapped_page(
357 510
358 bh = head = page_buffers(page); 511 bh = head = page_buffers(page);
359 do { 512 do {
360 if (buffer_mapped(bh) || !buffer_uptodate(bh)) 513 if (!buffer_uptodate(bh))
514 break;
515 if (mapped != buffer_mapped(bh))
361 break; 516 break;
362 ret += bh->b_size; 517 ret += bh->b_size;
363 if (ret >= pg_offset) 518 if (ret >= pg_offset)
364 break; 519 break;
365 } while ((bh = bh->b_this_page) != head); 520 } while ((bh = bh->b_this_page) != head);
366 } else 521 } else
367 ret = PAGE_CACHE_SIZE; 522 ret = mapped ? 0 : PAGE_CACHE_SIZE;
368 } 523 }
369 524
370out:
371 unlock_page(page);
372 return ret; 525 return ret;
373} 526}
374 527
375STATIC unsigned int 528STATIC size_t
376xfs_probe_unmapped_cluster( 529xfs_probe_cluster(
377 struct inode *inode, 530 struct inode *inode,
378 struct page *startpage, 531 struct page *startpage,
379 struct buffer_head *bh, 532 struct buffer_head *bh,
380 struct buffer_head *head) 533 struct buffer_head *head,
534 int mapped)
381{ 535{
536 struct pagevec pvec;
382 pgoff_t tindex, tlast, tloff; 537 pgoff_t tindex, tlast, tloff;
383 unsigned int pg_offset, len, total = 0; 538 size_t total = 0;
384 struct address_space *mapping = inode->i_mapping; 539 int done = 0, i;
385 540
386 /* First sum forwards in this page */ 541 /* First sum forwards in this page */
387 do { 542 do {
388 if (buffer_mapped(bh)) 543 if (mapped != buffer_mapped(bh))
389 break; 544 return total;
390 total += bh->b_size; 545 total += bh->b_size;
391 } while ((bh = bh->b_this_page) != head); 546 } while ((bh = bh->b_this_page) != head);
392 547
393 /* If we reached the end of the page, sum forwards in 548 /* if we reached the end of the page, sum forwards in following pages */
394 * following pages. 549 tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
395 */ 550 tindex = startpage->index + 1;
396 if (bh == head) { 551
397 tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; 552 /* Prune this back to avoid pathological behavior */
398 /* Prune this back to avoid pathological behavior */ 553 tloff = min(tlast, startpage->index + 64);
399 tloff = min(tlast, startpage->index + 64); 554
400 for (tindex = startpage->index + 1; tindex < tloff; tindex++) { 555 pagevec_init(&pvec, 0);
401 len = xfs_probe_unmapped_page(mapping, tindex, 556 while (!done && tindex <= tloff) {
402 PAGE_CACHE_SIZE); 557 unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
403 if (!len) 558
404 return total; 559 if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
560 break;
561
562 for (i = 0; i < pagevec_count(&pvec); i++) {
563 struct page *page = pvec.pages[i];
564 size_t pg_offset, len = 0;
565
566 if (tindex == tlast) {
567 pg_offset =
568 i_size_read(inode) & (PAGE_CACHE_SIZE - 1);
569 if (!pg_offset) {
570 done = 1;
571 break;
572 }
573 } else
574 pg_offset = PAGE_CACHE_SIZE;
575
576 if (page->index == tindex && !TestSetPageLocked(page)) {
577 len = xfs_probe_page(page, pg_offset, mapped);
578 unlock_page(page);
579 }
580
581 if (!len) {
582 done = 1;
583 break;
584 }
585
405 total += len; 586 total += len;
587 tindex++;
406 } 588 }
407 if (tindex == tlast && 589
408 (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { 590 pagevec_release(&pvec);
409 total += xfs_probe_unmapped_page(mapping, 591 cond_resched();
410 tindex, pg_offset);
411 }
412 } 592 }
593
413 return total; 594 return total;
414} 595}
415 596
416/* 597/*
417 * Probe for a given page (index) in the inode and test if it is delayed 598 * Test if a given page is suitable for writing as part of an unwritten
418 * and without unwritten buffers. Returns page locked and with an extra 599 * or delayed allocate extent.
419 * reference count.
420 */ 600 */
421STATIC struct page * 601STATIC int
422xfs_probe_delalloc_page( 602xfs_is_delayed_page(
423 struct inode *inode, 603 struct page *page,
424 pgoff_t index) 604 unsigned int type)
425{ 605{
426 struct page *page;
427
428 page = find_trylock_page(inode->i_mapping, index);
429 if (!page)
430 return NULL;
431 if (PageWriteback(page)) 606 if (PageWriteback(page))
432 goto out; 607 return 0;
433 608
434 if (page->mapping && page_has_buffers(page)) { 609 if (page->mapping && page_has_buffers(page)) {
435 struct buffer_head *bh, *head; 610 struct buffer_head *bh, *head;
@@ -437,243 +612,156 @@ xfs_probe_delalloc_page(
437 612
438 bh = head = page_buffers(page); 613 bh = head = page_buffers(page);
439 do { 614 do {
440 if (buffer_unwritten(bh)) { 615 if (buffer_unwritten(bh))
441 acceptable = 0; 616 acceptable = (type == IOMAP_UNWRITTEN);
617 else if (buffer_delay(bh))
618 acceptable = (type == IOMAP_DELAY);
619 else if (buffer_mapped(bh))
620 acceptable = (type == 0);
621 else
442 break; 622 break;
443 } else if (buffer_delay(bh)) {
444 acceptable = 1;
445 }
446 } while ((bh = bh->b_this_page) != head); 623 } while ((bh = bh->b_this_page) != head);
447 624
448 if (acceptable) 625 if (acceptable)
449 return page; 626 return 1;
450 }
451
452out:
453 unlock_page(page);
454 return NULL;
455}
456
457STATIC int
458xfs_map_unwritten(
459 struct inode *inode,
460 struct page *start_page,
461 struct buffer_head *head,
462 struct buffer_head *curr,
463 unsigned long p_offset,
464 int block_bits,
465 xfs_iomap_t *iomapp,
466 struct writeback_control *wbc,
467 int startio,
468 int all_bh)
469{
470 struct buffer_head *bh = curr;
471 xfs_iomap_t *tmp;
472 xfs_ioend_t *ioend;
473 loff_t offset;
474 unsigned long nblocks = 0;
475
476 offset = start_page->index;
477 offset <<= PAGE_CACHE_SHIFT;
478 offset += p_offset;
479
480 ioend = xfs_alloc_ioend(inode);
481
482 /* First map forwards in the page consecutive buffers
483 * covering this unwritten extent
484 */
485 do {
486 if (!buffer_unwritten(bh))
487 break;
488 tmp = xfs_offset_to_map(start_page, iomapp, p_offset);
489 if (!tmp)
490 break;
491 xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
492 set_buffer_unwritten_io(bh);
493 bh->b_private = ioend;
494 p_offset += bh->b_size;
495 nblocks++;
496 } while ((bh = bh->b_this_page) != head);
497
498 atomic_add(nblocks, &ioend->io_remaining);
499
500 /* If we reached the end of the page, map forwards in any
501 * following pages which are also covered by this extent.
502 */
503 if (bh == head) {
504 struct address_space *mapping = inode->i_mapping;
505 pgoff_t tindex, tloff, tlast;
506 unsigned long bs;
507 unsigned int pg_offset, bbits = inode->i_blkbits;
508 struct page *page;
509
510 tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
511 tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT;
512 tloff = min(tlast, tloff);
513 for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
514 page = xfs_probe_unwritten_page(mapping,
515 tindex, iomapp, ioend,
516 PAGE_CACHE_SIZE, &bs, bbits);
517 if (!page)
518 break;
519 nblocks += bs;
520 atomic_add(bs, &ioend->io_remaining);
521 xfs_convert_page(inode, page, iomapp, wbc, ioend,
522 startio, all_bh);
523 /* stop if converting the next page might add
524 * enough blocks that the corresponding byte
525 * count won't fit in our ulong page buf length */
526 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
527 goto enough;
528 }
529
530 if (tindex == tlast &&
531 (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
532 page = xfs_probe_unwritten_page(mapping,
533 tindex, iomapp, ioend,
534 pg_offset, &bs, bbits);
535 if (page) {
536 nblocks += bs;
537 atomic_add(bs, &ioend->io_remaining);
538 xfs_convert_page(inode, page, iomapp, wbc, ioend,
539 startio, all_bh);
540 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
541 goto enough;
542 }
543 }
544 } 627 }
545 628
546enough:
547 ioend->io_size = (xfs_off_t)nblocks << block_bits;
548 ioend->io_offset = offset;
549 xfs_finish_ioend(ioend);
550 return 0; 629 return 0;
551} 630}
552 631
553STATIC void
554xfs_submit_page(
555 struct page *page,
556 struct writeback_control *wbc,
557 struct buffer_head *bh_arr[],
558 int bh_count,
559 int probed_page,
560 int clear_dirty)
561{
562 struct buffer_head *bh;
563 int i;
564
565 BUG_ON(PageWriteback(page));
566 if (bh_count)
567 set_page_writeback(page);
568 if (clear_dirty)
569 clear_page_dirty(page);
570 unlock_page(page);
571
572 if (bh_count) {
573 for (i = 0; i < bh_count; i++) {
574 bh = bh_arr[i];
575 mark_buffer_async_write(bh);
576 if (buffer_unwritten(bh))
577 set_buffer_unwritten_io(bh);
578 set_buffer_uptodate(bh);
579 clear_buffer_dirty(bh);
580 }
581
582 for (i = 0; i < bh_count; i++)
583 submit_bh(WRITE, bh_arr[i]);
584
585 if (probed_page && clear_dirty)
586 wbc->nr_to_write--; /* Wrote an "extra" page */
587 }
588}
589
590/* 632/*
591 * Allocate & map buffers for page given the extent map. Write it out. 633 * Allocate & map buffers for page given the extent map. Write it out.
592 * except for the original page of a writepage, this is called on 634 * except for the original page of a writepage, this is called on
593 * delalloc/unwritten pages only, for the original page it is possible 635 * delalloc/unwritten pages only, for the original page it is possible
594 * that the page has no mapping at all. 636 * that the page has no mapping at all.
595 */ 637 */
596STATIC void 638STATIC int
597xfs_convert_page( 639xfs_convert_page(
598 struct inode *inode, 640 struct inode *inode,
599 struct page *page, 641 struct page *page,
600 xfs_iomap_t *iomapp, 642 loff_t tindex,
643 xfs_iomap_t *mp,
644 xfs_ioend_t **ioendp,
601 struct writeback_control *wbc, 645 struct writeback_control *wbc,
602 void *private,
603 int startio, 646 int startio,
604 int all_bh) 647 int all_bh)
605{ 648{
606 struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; 649 struct buffer_head *bh, *head;
607 xfs_iomap_t *mp = iomapp, *tmp; 650 xfs_off_t end_offset;
608 unsigned long offset, end_offset; 651 unsigned long p_offset;
609 int index = 0; 652 unsigned int type;
610 int bbits = inode->i_blkbits; 653 int bbits = inode->i_blkbits;
611 int len, page_dirty; 654 int len, page_dirty;
655 int count = 0, done = 0, uptodate = 1;
656 xfs_off_t offset = page_offset(page);
612 657
613 end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)); 658 if (page->index != tindex)
659 goto fail;
660 if (TestSetPageLocked(page))
661 goto fail;
662 if (PageWriteback(page))
663 goto fail_unlock_page;
664 if (page->mapping != inode->i_mapping)
665 goto fail_unlock_page;
666 if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
667 goto fail_unlock_page;
614 668
615 /* 669 /*
616 * page_dirty is initially a count of buffers on the page before 670 * page_dirty is initially a count of buffers on the page before
617 * EOF and is decrememted as we move each into a cleanable state. 671 * EOF and is decrememted as we move each into a cleanable state.
672 *
673 * Derivation:
674 *
675 * End offset is the highest offset that this page should represent.
676 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
677 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
678 * hence give us the correct page_dirty count. On any other page,
679 * it will be zero and in that case we need page_dirty to be the
680 * count of buffers on the page.
618 */ 681 */
682 end_offset = min_t(unsigned long long,
683 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
684 i_size_read(inode));
685
619 len = 1 << inode->i_blkbits; 686 len = 1 << inode->i_blkbits;
620 end_offset = max(end_offset, PAGE_CACHE_SIZE); 687 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
621 end_offset = roundup(end_offset, len); 688 PAGE_CACHE_SIZE);
622 page_dirty = end_offset / len; 689 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
690 page_dirty = p_offset / len;
623 691
624 offset = 0;
625 bh = head = page_buffers(page); 692 bh = head = page_buffers(page);
626 do { 693 do {
627 if (offset >= end_offset) 694 if (offset >= end_offset)
628 break; 695 break;
629 if (!(PageUptodate(page) || buffer_uptodate(bh))) 696 if (!buffer_uptodate(bh))
697 uptodate = 0;
698 if (!(PageUptodate(page) || buffer_uptodate(bh))) {
699 done = 1;
630 continue; 700 continue;
631 if (buffer_mapped(bh) && all_bh && 701 }
632 !(buffer_unwritten(bh) || buffer_delay(bh))) { 702
703 if (buffer_unwritten(bh) || buffer_delay(bh)) {
704 if (buffer_unwritten(bh))
705 type = IOMAP_UNWRITTEN;
706 else
707 type = IOMAP_DELAY;
708
709 if (!xfs_iomap_valid(mp, offset)) {
710 done = 1;
711 continue;
712 }
713
714 ASSERT(!(mp->iomap_flags & IOMAP_HOLE));
715 ASSERT(!(mp->iomap_flags & IOMAP_DELAY));
716
717 xfs_map_at_offset(bh, offset, bbits, mp);
633 if (startio) { 718 if (startio) {
719 xfs_add_to_ioend(inode, bh, offset,
720 type, ioendp, done);
721 } else {
722 set_buffer_dirty(bh);
723 unlock_buffer(bh);
724 mark_buffer_dirty(bh);
725 }
726 page_dirty--;
727 count++;
728 } else {
729 type = 0;
730 if (buffer_mapped(bh) && all_bh && startio) {
634 lock_buffer(bh); 731 lock_buffer(bh);
635 bh_arr[index++] = bh; 732 xfs_add_to_ioend(inode, bh, offset,
733 type, ioendp, done);
734 count++;
636 page_dirty--; 735 page_dirty--;
736 } else {
737 done = 1;
637 } 738 }
638 continue;
639 } 739 }
640 tmp = xfs_offset_to_map(page, mp, offset); 740 } while (offset += len, (bh = bh->b_this_page) != head);
641 if (!tmp)
642 continue;
643 ASSERT(!(tmp->iomap_flags & IOMAP_HOLE));
644 ASSERT(!(tmp->iomap_flags & IOMAP_DELAY));
645 741
646 /* If this is a new unwritten extent buffer (i.e. one 742 if (uptodate && bh == head)
647 * that we haven't passed in private data for, we must 743 SetPageUptodate(page);
648 * now map this buffer too. 744
649 */ 745 if (startio) {
650 if (buffer_unwritten(bh) && !bh->b_end_io) { 746 if (count) {
651 ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN); 747 struct backing_dev_info *bdi;
652 xfs_map_unwritten(inode, page, head, bh, offset, 748
653 bbits, tmp, wbc, startio, all_bh); 749 bdi = inode->i_mapping->backing_dev_info;
654 } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) { 750 if (bdi_write_congested(bdi)) {
655 xfs_map_at_offset(page, bh, offset, bbits, tmp); 751 wbc->encountered_congestion = 1;
656 if (buffer_unwritten(bh)) { 752 done = 1;
657 set_buffer_unwritten_io(bh); 753 } else if (--wbc->nr_to_write <= 0) {
658 bh->b_private = private; 754 done = 1;
659 ASSERT(private);
660 } 755 }
661 } 756 }
662 if (startio) { 757 xfs_start_page_writeback(page, wbc, !page_dirty, count);
663 bh_arr[index++] = bh;
664 } else {
665 set_buffer_dirty(bh);
666 unlock_buffer(bh);
667 mark_buffer_dirty(bh);
668 }
669 page_dirty--;
670 } while (offset += len, (bh = bh->b_this_page) != head);
671
672 if (startio && index) {
673 xfs_submit_page(page, wbc, bh_arr, index, 1, !page_dirty);
674 } else {
675 unlock_page(page);
676 } 758 }
759
760 return done;
761 fail_unlock_page:
762 unlock_page(page);
763 fail:
764 return 1;
677} 765}
678 766
679/* 767/*
@@ -685,19 +773,31 @@ xfs_cluster_write(
685 struct inode *inode, 773 struct inode *inode,
686 pgoff_t tindex, 774 pgoff_t tindex,
687 xfs_iomap_t *iomapp, 775 xfs_iomap_t *iomapp,
776 xfs_ioend_t **ioendp,
688 struct writeback_control *wbc, 777 struct writeback_control *wbc,
689 int startio, 778 int startio,
690 int all_bh, 779 int all_bh,
691 pgoff_t tlast) 780 pgoff_t tlast)
692{ 781{
693 struct page *page; 782 struct pagevec pvec;
783 int done = 0, i;
784
785 pagevec_init(&pvec, 0);
786 while (!done && tindex <= tlast) {
787 unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
694 788
695 for (; tindex <= tlast; tindex++) { 789 if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
696 page = xfs_probe_delalloc_page(inode, tindex);
697 if (!page)
698 break; 790 break;
699 xfs_convert_page(inode, page, iomapp, wbc, NULL, 791
700 startio, all_bh); 792 for (i = 0; i < pagevec_count(&pvec); i++) {
793 done = xfs_convert_page(inode, pvec.pages[i], tindex++,
794 iomapp, ioendp, wbc, startio, all_bh);
795 if (done)
796 break;
797 }
798
799 pagevec_release(&pvec);
800 cond_resched();
701 } 801 }
702} 802}
703 803
@@ -728,18 +828,22 @@ xfs_page_state_convert(
728 int startio, 828 int startio,
729 int unmapped) /* also implies page uptodate */ 829 int unmapped) /* also implies page uptodate */
730{ 830{
731 struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; 831 struct buffer_head *bh, *head;
732 xfs_iomap_t *iomp, iomap; 832 xfs_iomap_t iomap;
833 xfs_ioend_t *ioend = NULL, *iohead = NULL;
733 loff_t offset; 834 loff_t offset;
734 unsigned long p_offset = 0; 835 unsigned long p_offset = 0;
836 unsigned int type;
735 __uint64_t end_offset; 837 __uint64_t end_offset;
736 pgoff_t end_index, last_index, tlast; 838 pgoff_t end_index, last_index, tlast;
737 int len, err, i, cnt = 0, uptodate = 1; 839 ssize_t size, len;
738 int flags; 840 int flags, err, iomap_valid = 0, uptodate = 1;
739 int page_dirty; 841 int page_dirty, count = 0, trylock_flag = 0;
842 int all_bh = unmapped;
740 843
741 /* wait for other IO threads? */ 844 /* wait for other IO threads? */
742 flags = (startio && wbc->sync_mode != WB_SYNC_NONE) ? 0 : BMAPI_TRYLOCK; 845 if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking))
846 trylock_flag |= BMAPI_TRYLOCK;
743 847
744 /* Is this page beyond the end of the file? */ 848 /* Is this page beyond the end of the file? */
745 offset = i_size_read(inode); 849 offset = i_size_read(inode);
@@ -754,161 +858,173 @@ xfs_page_state_convert(
754 } 858 }
755 } 859 }
756 860
757 end_offset = min_t(unsigned long long,
758 (loff_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
759 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
760
761 /* 861 /*
762 * page_dirty is initially a count of buffers on the page before 862 * page_dirty is initially a count of buffers on the page before
763 * EOF and is decrememted as we move each into a cleanable state. 863 * EOF and is decrememted as we move each into a cleanable state.
764 */ 864 *
865 * Derivation:
866 *
867 * End offset is the highest offset that this page should represent.
868 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
869 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
870 * hence give us the correct page_dirty count. On any other page,
871 * it will be zero and in that case we need page_dirty to be the
872 * count of buffers on the page.
873 */
874 end_offset = min_t(unsigned long long,
875 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
765 len = 1 << inode->i_blkbits; 876 len = 1 << inode->i_blkbits;
766 p_offset = max(p_offset, PAGE_CACHE_SIZE); 877 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
767 p_offset = roundup(p_offset, len); 878 PAGE_CACHE_SIZE);
879 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
768 page_dirty = p_offset / len; 880 page_dirty = p_offset / len;
769 881
770 iomp = NULL;
771 p_offset = 0;
772 bh = head = page_buffers(page); 882 bh = head = page_buffers(page);
883 offset = page_offset(page);
884 flags = -1;
885 type = 0;
886
887 /* TODO: cleanup count and page_dirty */
773 888
774 do { 889 do {
775 if (offset >= end_offset) 890 if (offset >= end_offset)
776 break; 891 break;
777 if (!buffer_uptodate(bh)) 892 if (!buffer_uptodate(bh))
778 uptodate = 0; 893 uptodate = 0;
779 if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) 894 if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) {
895 /*
896 * the iomap is actually still valid, but the ioend
897 * isn't. shouldn't happen too often.
898 */
899 iomap_valid = 0;
780 continue; 900 continue;
781
782 if (iomp) {
783 iomp = xfs_offset_to_map(page, &iomap, p_offset);
784 } 901 }
785 902
903 if (iomap_valid)
904 iomap_valid = xfs_iomap_valid(&iomap, offset);
905
786 /* 906 /*
787 * First case, map an unwritten extent and prepare for 907 * First case, map an unwritten extent and prepare for
788 * extent state conversion transaction on completion. 908 * extent state conversion transaction on completion.
789 */ 909 *
790 if (buffer_unwritten(bh)) { 910 * Second case, allocate space for a delalloc buffer.
791 if (!startio) 911 * We can return EAGAIN here in the release page case.
792 continue; 912 *
793 if (!iomp) { 913 * Third case, an unmapped buffer was found, and we are
794 err = xfs_map_blocks(inode, offset, len, &iomap, 914 * in a path where we need to write the whole page out.
795 BMAPI_WRITE|BMAPI_IGNSTATE); 915 */
796 if (err) { 916 if (buffer_unwritten(bh) || buffer_delay(bh) ||
797 goto error; 917 ((buffer_uptodate(bh) || PageUptodate(page)) &&
798 } 918 !buffer_mapped(bh) && (unmapped || startio))) {
799 iomp = xfs_offset_to_map(page, &iomap, 919 /*
800 p_offset); 920 * Make sure we don't use a read-only iomap
921 */
922 if (flags == BMAPI_READ)
923 iomap_valid = 0;
924
925 if (buffer_unwritten(bh)) {
926 type = IOMAP_UNWRITTEN;
927 flags = BMAPI_WRITE|BMAPI_IGNSTATE;
928 } else if (buffer_delay(bh)) {
929 type = IOMAP_DELAY;
930 flags = BMAPI_ALLOCATE;
931 if (!startio)
932 flags |= trylock_flag;
933 } else {
934 type = IOMAP_NEW;
935 flags = BMAPI_WRITE|BMAPI_MMAP;
801 } 936 }
802 if (iomp) { 937
803 if (!bh->b_end_io) { 938 if (!iomap_valid) {
804 err = xfs_map_unwritten(inode, page, 939 if (type == IOMAP_NEW) {
805 head, bh, p_offset, 940 size = xfs_probe_cluster(inode,
806 inode->i_blkbits, iomp, 941 page, bh, head, 0);
807 wbc, startio, unmapped);
808 if (err) {
809 goto error;
810 }
811 } else { 942 } else {
812 set_bit(BH_Lock, &bh->b_state); 943 size = len;
813 } 944 }
814 BUG_ON(!buffer_locked(bh)); 945
815 bh_arr[cnt++] = bh; 946 err = xfs_map_blocks(inode, offset, size,
816 page_dirty--; 947 &iomap, flags);
817 } 948 if (err)
818 /*
819 * Second case, allocate space for a delalloc buffer.
820 * We can return EAGAIN here in the release page case.
821 */
822 } else if (buffer_delay(bh)) {
823 if (!iomp) {
824 err = xfs_map_blocks(inode, offset, len, &iomap,
825 BMAPI_ALLOCATE | flags);
826 if (err) {
827 goto error; 949 goto error;
828 } 950 iomap_valid = xfs_iomap_valid(&iomap, offset);
829 iomp = xfs_offset_to_map(page, &iomap,
830 p_offset);
831 } 951 }
832 if (iomp) { 952 if (iomap_valid) {
833 xfs_map_at_offset(page, bh, p_offset, 953 xfs_map_at_offset(bh, offset,
834 inode->i_blkbits, iomp); 954 inode->i_blkbits, &iomap);
835 if (startio) { 955 if (startio) {
836 bh_arr[cnt++] = bh; 956 xfs_add_to_ioend(inode, bh, offset,
957 type, &ioend,
958 !iomap_valid);
837 } else { 959 } else {
838 set_buffer_dirty(bh); 960 set_buffer_dirty(bh);
839 unlock_buffer(bh); 961 unlock_buffer(bh);
840 mark_buffer_dirty(bh); 962 mark_buffer_dirty(bh);
841 } 963 }
842 page_dirty--; 964 page_dirty--;
965 count++;
966 }
967 } else if (buffer_uptodate(bh) && startio) {
968 /*
969 * we got here because the buffer is already mapped.
970 * That means it must already have extents allocated
971 * underneath it. Map the extent by reading it.
972 */
973 if (!iomap_valid || type != 0) {
974 flags = BMAPI_READ;
975 size = xfs_probe_cluster(inode, page, bh,
976 head, 1);
977 err = xfs_map_blocks(inode, offset, size,
978 &iomap, flags);
979 if (err)
980 goto error;
981 iomap_valid = xfs_iomap_valid(&iomap, offset);
843 } 982 }
844 } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
845 (unmapped || startio)) {
846 983
847 if (!buffer_mapped(bh)) { 984 type = 0;
848 int size; 985 if (!test_and_set_bit(BH_Lock, &bh->b_state)) {
849 986 ASSERT(buffer_mapped(bh));
850 /* 987 if (iomap_valid)
851 * Getting here implies an unmapped buffer 988 all_bh = 1;
852 * was found, and we are in a path where we 989 xfs_add_to_ioend(inode, bh, offset, type,
853 * need to write the whole page out. 990 &ioend, !iomap_valid);
854 */ 991 page_dirty--;
855 if (!iomp) { 992 count++;
856 size = xfs_probe_unmapped_cluster( 993 } else {
857 inode, page, bh, head); 994 iomap_valid = 0;
858 err = xfs_map_blocks(inode, offset,
859 size, &iomap,
860 BMAPI_WRITE|BMAPI_MMAP);
861 if (err) {
862 goto error;
863 }
864 iomp = xfs_offset_to_map(page, &iomap,
865 p_offset);
866 }
867 if (iomp) {
868 xfs_map_at_offset(page,
869 bh, p_offset,
870 inode->i_blkbits, iomp);
871 if (startio) {
872 bh_arr[cnt++] = bh;
873 } else {
874 set_buffer_dirty(bh);
875 unlock_buffer(bh);
876 mark_buffer_dirty(bh);
877 }
878 page_dirty--;
879 }
880 } else if (startio) {
881 if (buffer_uptodate(bh) &&
882 !test_and_set_bit(BH_Lock, &bh->b_state)) {
883 bh_arr[cnt++] = bh;
884 page_dirty--;
885 }
886 } 995 }
996 } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
997 (unmapped || startio)) {
998 iomap_valid = 0;
887 } 999 }
888 } while (offset += len, p_offset += len, 1000
889 ((bh = bh->b_this_page) != head)); 1001 if (!iohead)
1002 iohead = ioend;
1003
1004 } while (offset += len, ((bh = bh->b_this_page) != head));
890 1005
891 if (uptodate && bh == head) 1006 if (uptodate && bh == head)
892 SetPageUptodate(page); 1007 SetPageUptodate(page);
893 1008
894 if (startio) { 1009 if (startio)
895 xfs_submit_page(page, wbc, bh_arr, cnt, 0, !page_dirty); 1010 xfs_start_page_writeback(page, wbc, 1, count);
896 }
897 1011
898 if (iomp) { 1012 if (ioend && iomap_valid) {
899 offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >> 1013 offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
900 PAGE_CACHE_SHIFT; 1014 PAGE_CACHE_SHIFT;
901 tlast = min_t(pgoff_t, offset, last_index); 1015 tlast = min_t(pgoff_t, offset, last_index);
902 xfs_cluster_write(inode, page->index + 1, iomp, wbc, 1016 xfs_cluster_write(inode, page->index + 1, &iomap, &ioend,
903 startio, unmapped, tlast); 1017 wbc, startio, all_bh, tlast);
904 } 1018 }
905 1019
1020 if (iohead)
1021 xfs_submit_ioend(iohead);
1022
906 return page_dirty; 1023 return page_dirty;
907 1024
908error: 1025error:
909 for (i = 0; i < cnt; i++) { 1026 if (iohead)
910 unlock_buffer(bh_arr[i]); 1027 xfs_cancel_ioend(iohead);
911 }
912 1028
913 /* 1029 /*
914 * If it's delalloc and we have nowhere to put it, 1030 * If it's delalloc and we have nowhere to put it,
@@ -916,9 +1032,8 @@ error:
916 * us to try again. 1032 * us to try again.
917 */ 1033 */
918 if (err != -EAGAIN) { 1034 if (err != -EAGAIN) {
919 if (!unmapped) { 1035 if (!unmapped)
920 block_invalidatepage(page, 0); 1036 block_invalidatepage(page, 0);
921 }
922 ClearPageUptodate(page); 1037 ClearPageUptodate(page);
923 } 1038 }
924 return err; 1039 return err;
@@ -982,7 +1097,7 @@ __linvfs_get_block(
982 } 1097 }
983 1098
984 /* If this is a realtime file, data might be on a new device */ 1099 /* If this is a realtime file, data might be on a new device */
985 bh_result->b_bdev = iomap.iomap_target->pbr_bdev; 1100 bh_result->b_bdev = iomap.iomap_target->bt_bdev;
986 1101
987 /* If we previously allocated a block out beyond eof and 1102 /* If we previously allocated a block out beyond eof and
988 * we are now coming back to use it then we will need to 1103 * we are now coming back to use it then we will need to
@@ -1094,10 +1209,10 @@ linvfs_direct_IO(
1094 if (error) 1209 if (error)
1095 return -error; 1210 return -error;
1096 1211
1097 iocb->private = xfs_alloc_ioend(inode); 1212 iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
1098 1213
1099 ret = blockdev_direct_IO_own_locking(rw, iocb, inode, 1214 ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
1100 iomap.iomap_target->pbr_bdev, 1215 iomap.iomap_target->bt_bdev,
1101 iov, offset, nr_segs, 1216 iov, offset, nr_segs,
1102 linvfs_get_blocks_direct, 1217 linvfs_get_blocks_direct,
1103 linvfs_end_io_direct); 1218 linvfs_end_io_direct);
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 4720758a9ade..55339dd5a30d 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -23,14 +23,24 @@ extern mempool_t *xfs_ioend_pool;
23 23
24typedef void (*xfs_ioend_func_t)(void *); 24typedef void (*xfs_ioend_func_t)(void *);
25 25
26/*
27 * xfs_ioend struct manages large extent writes for XFS.
28 * It can manage several multi-page bio's at once.
29 */
26typedef struct xfs_ioend { 30typedef struct xfs_ioend {
31 struct xfs_ioend *io_list; /* next ioend in chain */
32 unsigned int io_type; /* delalloc / unwritten */
27 unsigned int io_uptodate; /* I/O status register */ 33 unsigned int io_uptodate; /* I/O status register */
28 atomic_t io_remaining; /* hold count */ 34 atomic_t io_remaining; /* hold count */
29 struct vnode *io_vnode; /* file being written to */ 35 struct vnode *io_vnode; /* file being written to */
30 struct buffer_head *io_buffer_head;/* buffer linked list head */ 36 struct buffer_head *io_buffer_head;/* buffer linked list head */
37 struct buffer_head *io_buffer_tail;/* buffer linked list tail */
31 size_t io_size; /* size of the extent */ 38 size_t io_size; /* size of the extent */
32 xfs_off_t io_offset; /* offset in the file */ 39 xfs_off_t io_offset; /* offset in the file */
33 struct work_struct io_work; /* xfsdatad work queue */ 40 struct work_struct io_work; /* xfsdatad work queue */
34} xfs_ioend_t; 41} xfs_ioend_t;
35 42
43extern struct address_space_operations linvfs_aops;
44extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
45
36#endif /* __XFS_IOPS_H__ */ 46#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 6fe21d2b8847..e44b7c1a3a36 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -31,76 +31,77 @@
31#include <linux/kthread.h> 31#include <linux/kthread.h>
32#include "xfs_linux.h" 32#include "xfs_linux.h"
33 33
34STATIC kmem_cache_t *pagebuf_zone; 34STATIC kmem_zone_t *xfs_buf_zone;
35STATIC kmem_shaker_t pagebuf_shake; 35STATIC kmem_shaker_t xfs_buf_shake;
36STATIC int xfsbufd(void *);
36STATIC int xfsbufd_wakeup(int, gfp_t); 37STATIC int xfsbufd_wakeup(int, gfp_t);
37STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); 38STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
38 39
39STATIC struct workqueue_struct *xfslogd_workqueue; 40STATIC struct workqueue_struct *xfslogd_workqueue;
40struct workqueue_struct *xfsdatad_workqueue; 41struct workqueue_struct *xfsdatad_workqueue;
41 42
42#ifdef PAGEBUF_TRACE 43#ifdef XFS_BUF_TRACE
43void 44void
44pagebuf_trace( 45xfs_buf_trace(
45 xfs_buf_t *pb, 46 xfs_buf_t *bp,
46 char *id, 47 char *id,
47 void *data, 48 void *data,
48 void *ra) 49 void *ra)
49{ 50{
50 ktrace_enter(pagebuf_trace_buf, 51 ktrace_enter(xfs_buf_trace_buf,
51 pb, id, 52 bp, id,
52 (void *)(unsigned long)pb->pb_flags, 53 (void *)(unsigned long)bp->b_flags,
53 (void *)(unsigned long)pb->pb_hold.counter, 54 (void *)(unsigned long)bp->b_hold.counter,
54 (void *)(unsigned long)pb->pb_sema.count.counter, 55 (void *)(unsigned long)bp->b_sema.count.counter,
55 (void *)current, 56 (void *)current,
56 data, ra, 57 data, ra,
57 (void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff), 58 (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),
58 (void *)(unsigned long)(pb->pb_file_offset & 0xffffffff), 59 (void *)(unsigned long)(bp->b_file_offset & 0xffffffff),
59 (void *)(unsigned long)pb->pb_buffer_length, 60 (void *)(unsigned long)bp->b_buffer_length,
60 NULL, NULL, NULL, NULL, NULL); 61 NULL, NULL, NULL, NULL, NULL);
61} 62}
62ktrace_t *pagebuf_trace_buf; 63ktrace_t *xfs_buf_trace_buf;
63#define PAGEBUF_TRACE_SIZE 4096 64#define XFS_BUF_TRACE_SIZE 4096
64#define PB_TRACE(pb, id, data) \ 65#define XB_TRACE(bp, id, data) \
65 pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0)) 66 xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0))
66#else 67#else
67#define PB_TRACE(pb, id, data) do { } while (0) 68#define XB_TRACE(bp, id, data) do { } while (0)
68#endif 69#endif
69 70
70#ifdef PAGEBUF_LOCK_TRACKING 71#ifdef XFS_BUF_LOCK_TRACKING
71# define PB_SET_OWNER(pb) ((pb)->pb_last_holder = current->pid) 72# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
72# define PB_CLEAR_OWNER(pb) ((pb)->pb_last_holder = -1) 73# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1)
73# define PB_GET_OWNER(pb) ((pb)->pb_last_holder) 74# define XB_GET_OWNER(bp) ((bp)->b_last_holder)
74#else 75#else
75# define PB_SET_OWNER(pb) do { } while (0) 76# define XB_SET_OWNER(bp) do { } while (0)
76# define PB_CLEAR_OWNER(pb) do { } while (0) 77# define XB_CLEAR_OWNER(bp) do { } while (0)
77# define PB_GET_OWNER(pb) do { } while (0) 78# define XB_GET_OWNER(bp) do { } while (0)
78#endif 79#endif
79 80
80#define pb_to_gfp(flags) \ 81#define xb_to_gfp(flags) \
81 ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \ 82 ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
82 ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) 83 ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
83 84
84#define pb_to_km(flags) \ 85#define xb_to_km(flags) \
85 (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) 86 (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
86 87
87#define pagebuf_allocate(flags) \ 88#define xfs_buf_allocate(flags) \
88 kmem_zone_alloc(pagebuf_zone, pb_to_km(flags)) 89 kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
89#define pagebuf_deallocate(pb) \ 90#define xfs_buf_deallocate(bp) \
90 kmem_zone_free(pagebuf_zone, (pb)); 91 kmem_zone_free(xfs_buf_zone, (bp));
91 92
92/* 93/*
93 * Page Region interfaces. 94 * Page Region interfaces.
94 * 95 *
95 * For pages in filesystems where the blocksize is smaller than the 96 * For pages in filesystems where the blocksize is smaller than the
96 * pagesize, we use the page->private field (long) to hold a bitmap 97 * pagesize, we use the page->private field (long) to hold a bitmap
97 * of uptodate regions within the page. 98 * of uptodate regions within the page.
98 * 99 *
99 * Each such region is "bytes per page / bits per long" bytes long. 100 * Each such region is "bytes per page / bits per long" bytes long.
100 * 101 *
101 * NBPPR == number-of-bytes-per-page-region 102 * NBPPR == number-of-bytes-per-page-region
102 * BTOPR == bytes-to-page-region (rounded up) 103 * BTOPR == bytes-to-page-region (rounded up)
103 * BTOPRT == bytes-to-page-region-truncated (rounded down) 104 * BTOPRT == bytes-to-page-region-truncated (rounded down)
104 */ 105 */
105#if (BITS_PER_LONG == 32) 106#if (BITS_PER_LONG == 32)
106#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */ 107#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */
@@ -159,7 +160,7 @@ test_page_region(
159} 160}
160 161
161/* 162/*
162 * Mapping of multi-page buffers into contiguous virtual space 163 * Mapping of multi-page buffers into contiguous virtual space
163 */ 164 */
164 165
165typedef struct a_list { 166typedef struct a_list {
@@ -172,7 +173,7 @@ STATIC int as_list_len;
172STATIC DEFINE_SPINLOCK(as_lock); 173STATIC DEFINE_SPINLOCK(as_lock);
173 174
174/* 175/*
175 * Try to batch vunmaps because they are costly. 176 * Try to batch vunmaps because they are costly.
176 */ 177 */
177STATIC void 178STATIC void
178free_address( 179free_address(
@@ -215,83 +216,83 @@ purge_addresses(void)
215} 216}
216 217
217/* 218/*
218 * Internal pagebuf object manipulation 219 * Internal xfs_buf_t object manipulation
219 */ 220 */
220 221
221STATIC void 222STATIC void
222_pagebuf_initialize( 223_xfs_buf_initialize(
223 xfs_buf_t *pb, 224 xfs_buf_t *bp,
224 xfs_buftarg_t *target, 225 xfs_buftarg_t *target,
225 loff_t range_base, 226 xfs_off_t range_base,
226 size_t range_length, 227 size_t range_length,
227 page_buf_flags_t flags) 228 xfs_buf_flags_t flags)
228{ 229{
229 /* 230 /*
230 * We don't want certain flags to appear in pb->pb_flags. 231 * We don't want certain flags to appear in b_flags.
231 */ 232 */
232 flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD); 233 flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
233 234
234 memset(pb, 0, sizeof(xfs_buf_t)); 235 memset(bp, 0, sizeof(xfs_buf_t));
235 atomic_set(&pb->pb_hold, 1); 236 atomic_set(&bp->b_hold, 1);
236 init_MUTEX_LOCKED(&pb->pb_iodonesema); 237 init_MUTEX_LOCKED(&bp->b_iodonesema);
237 INIT_LIST_HEAD(&pb->pb_list); 238 INIT_LIST_HEAD(&bp->b_list);
238 INIT_LIST_HEAD(&pb->pb_hash_list); 239 INIT_LIST_HEAD(&bp->b_hash_list);
239 init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */ 240 init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
240 PB_SET_OWNER(pb); 241 XB_SET_OWNER(bp);
241 pb->pb_target = target; 242 bp->b_target = target;
242 pb->pb_file_offset = range_base; 243 bp->b_file_offset = range_base;
243 /* 244 /*
244 * Set buffer_length and count_desired to the same value initially. 245 * Set buffer_length and count_desired to the same value initially.
245 * I/O routines should use count_desired, which will be the same in 246 * I/O routines should use count_desired, which will be the same in
246 * most cases but may be reset (e.g. XFS recovery). 247 * most cases but may be reset (e.g. XFS recovery).
247 */ 248 */
248 pb->pb_buffer_length = pb->pb_count_desired = range_length; 249 bp->b_buffer_length = bp->b_count_desired = range_length;
249 pb->pb_flags = flags; 250 bp->b_flags = flags;
250 pb->pb_bn = XFS_BUF_DADDR_NULL; 251 bp->b_bn = XFS_BUF_DADDR_NULL;
251 atomic_set(&pb->pb_pin_count, 0); 252 atomic_set(&bp->b_pin_count, 0);
252 init_waitqueue_head(&pb->pb_waiters); 253 init_waitqueue_head(&bp->b_waiters);
253 254
254 XFS_STATS_INC(pb_create); 255 XFS_STATS_INC(xb_create);
255 PB_TRACE(pb, "initialize", target); 256 XB_TRACE(bp, "initialize", target);
256} 257}
257 258
258/* 259/*
259 * Allocate a page array capable of holding a specified number 260 * Allocate a page array capable of holding a specified number
260 * of pages, and point the page buf at it. 261 * of pages, and point the page buf at it.
261 */ 262 */
262STATIC int 263STATIC int
263_pagebuf_get_pages( 264_xfs_buf_get_pages(
264 xfs_buf_t *pb, 265 xfs_buf_t *bp,
265 int page_count, 266 int page_count,
266 page_buf_flags_t flags) 267 xfs_buf_flags_t flags)
267{ 268{
268 /* Make sure that we have a page list */ 269 /* Make sure that we have a page list */
269 if (pb->pb_pages == NULL) { 270 if (bp->b_pages == NULL) {
270 pb->pb_offset = page_buf_poff(pb->pb_file_offset); 271 bp->b_offset = xfs_buf_poff(bp->b_file_offset);
271 pb->pb_page_count = page_count; 272 bp->b_page_count = page_count;
272 if (page_count <= PB_PAGES) { 273 if (page_count <= XB_PAGES) {
273 pb->pb_pages = pb->pb_page_array; 274 bp->b_pages = bp->b_page_array;
274 } else { 275 } else {
275 pb->pb_pages = kmem_alloc(sizeof(struct page *) * 276 bp->b_pages = kmem_alloc(sizeof(struct page *) *
276 page_count, pb_to_km(flags)); 277 page_count, xb_to_km(flags));
277 if (pb->pb_pages == NULL) 278 if (bp->b_pages == NULL)
278 return -ENOMEM; 279 return -ENOMEM;
279 } 280 }
280 memset(pb->pb_pages, 0, sizeof(struct page *) * page_count); 281 memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
281 } 282 }
282 return 0; 283 return 0;
283} 284}
284 285
285/* 286/*
286 * Frees pb_pages if it was malloced. 287 * Frees b_pages if it was allocated.
287 */ 288 */
288STATIC void 289STATIC void
289_pagebuf_free_pages( 290_xfs_buf_free_pages(
290 xfs_buf_t *bp) 291 xfs_buf_t *bp)
291{ 292{
292 if (bp->pb_pages != bp->pb_page_array) { 293 if (bp->b_pages != bp->b_page_array) {
293 kmem_free(bp->pb_pages, 294 kmem_free(bp->b_pages,
294 bp->pb_page_count * sizeof(struct page *)); 295 bp->b_page_count * sizeof(struct page *));
295 } 296 }
296} 297}
297 298
@@ -299,79 +300,79 @@ _pagebuf_free_pages(
299 * Releases the specified buffer. 300 * Releases the specified buffer.
300 * 301 *
301 * The modification state of any associated pages is left unchanged. 302 * The modification state of any associated pages is left unchanged.
302 * The buffer most not be on any hash - use pagebuf_rele instead for 303 * The buffer most not be on any hash - use xfs_buf_rele instead for
303 * hashed and refcounted buffers 304 * hashed and refcounted buffers
304 */ 305 */
305void 306void
306pagebuf_free( 307xfs_buf_free(
307 xfs_buf_t *bp) 308 xfs_buf_t *bp)
308{ 309{
309 PB_TRACE(bp, "free", 0); 310 XB_TRACE(bp, "free", 0);
310 311
311 ASSERT(list_empty(&bp->pb_hash_list)); 312 ASSERT(list_empty(&bp->b_hash_list));
312 313
313 if (bp->pb_flags & _PBF_PAGE_CACHE) { 314 if (bp->b_flags & _XBF_PAGE_CACHE) {
314 uint i; 315 uint i;
315 316
316 if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1)) 317 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
317 free_address(bp->pb_addr - bp->pb_offset); 318 free_address(bp->b_addr - bp->b_offset);
318 319
319 for (i = 0; i < bp->pb_page_count; i++) 320 for (i = 0; i < bp->b_page_count; i++)
320 page_cache_release(bp->pb_pages[i]); 321 page_cache_release(bp->b_pages[i]);
321 _pagebuf_free_pages(bp); 322 _xfs_buf_free_pages(bp);
322 } else if (bp->pb_flags & _PBF_KMEM_ALLOC) { 323 } else if (bp->b_flags & _XBF_KMEM_ALLOC) {
323 /* 324 /*
324 * XXX(hch): bp->pb_count_desired might be incorrect (see 325 * XXX(hch): bp->b_count_desired might be incorrect (see
325 * pagebuf_associate_memory for details), but fortunately 326 * xfs_buf_associate_memory for details), but fortunately
326 * the Linux version of kmem_free ignores the len argument.. 327 * the Linux version of kmem_free ignores the len argument..
327 */ 328 */
328 kmem_free(bp->pb_addr, bp->pb_count_desired); 329 kmem_free(bp->b_addr, bp->b_count_desired);
329 _pagebuf_free_pages(bp); 330 _xfs_buf_free_pages(bp);
330 } 331 }
331 332
332 pagebuf_deallocate(bp); 333 xfs_buf_deallocate(bp);
333} 334}
334 335
335/* 336/*
336 * Finds all pages for buffer in question and builds it's page list. 337 * Finds all pages for buffer in question and builds it's page list.
337 */ 338 */
338STATIC int 339STATIC int
339_pagebuf_lookup_pages( 340_xfs_buf_lookup_pages(
340 xfs_buf_t *bp, 341 xfs_buf_t *bp,
341 uint flags) 342 uint flags)
342{ 343{
343 struct address_space *mapping = bp->pb_target->pbr_mapping; 344 struct address_space *mapping = bp->b_target->bt_mapping;
344 size_t blocksize = bp->pb_target->pbr_bsize; 345 size_t blocksize = bp->b_target->bt_bsize;
345 size_t size = bp->pb_count_desired; 346 size_t size = bp->b_count_desired;
346 size_t nbytes, offset; 347 size_t nbytes, offset;
347 gfp_t gfp_mask = pb_to_gfp(flags); 348 gfp_t gfp_mask = xb_to_gfp(flags);
348 unsigned short page_count, i; 349 unsigned short page_count, i;
349 pgoff_t first; 350 pgoff_t first;
350 loff_t end; 351 xfs_off_t end;
351 int error; 352 int error;
352 353
353 end = bp->pb_file_offset + bp->pb_buffer_length; 354 end = bp->b_file_offset + bp->b_buffer_length;
354 page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset); 355 page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
355 356
356 error = _pagebuf_get_pages(bp, page_count, flags); 357 error = _xfs_buf_get_pages(bp, page_count, flags);
357 if (unlikely(error)) 358 if (unlikely(error))
358 return error; 359 return error;
359 bp->pb_flags |= _PBF_PAGE_CACHE; 360 bp->b_flags |= _XBF_PAGE_CACHE;
360 361
361 offset = bp->pb_offset; 362 offset = bp->b_offset;
362 first = bp->pb_file_offset >> PAGE_CACHE_SHIFT; 363 first = bp->b_file_offset >> PAGE_CACHE_SHIFT;
363 364
364 for (i = 0; i < bp->pb_page_count; i++) { 365 for (i = 0; i < bp->b_page_count; i++) {
365 struct page *page; 366 struct page *page;
366 uint retries = 0; 367 uint retries = 0;
367 368
368 retry: 369 retry:
369 page = find_or_create_page(mapping, first + i, gfp_mask); 370 page = find_or_create_page(mapping, first + i, gfp_mask);
370 if (unlikely(page == NULL)) { 371 if (unlikely(page == NULL)) {
371 if (flags & PBF_READ_AHEAD) { 372 if (flags & XBF_READ_AHEAD) {
372 bp->pb_page_count = i; 373 bp->b_page_count = i;
373 for (i = 0; i < bp->pb_page_count; i++) 374 for (i = 0; i < bp->b_page_count; i++)
374 unlock_page(bp->pb_pages[i]); 375 unlock_page(bp->b_pages[i]);
375 return -ENOMEM; 376 return -ENOMEM;
376 } 377 }
377 378
@@ -387,13 +388,13 @@ _pagebuf_lookup_pages(
387 "deadlock in %s (mode:0x%x)\n", 388 "deadlock in %s (mode:0x%x)\n",
388 __FUNCTION__, gfp_mask); 389 __FUNCTION__, gfp_mask);
389 390
390 XFS_STATS_INC(pb_page_retries); 391 XFS_STATS_INC(xb_page_retries);
391 xfsbufd_wakeup(0, gfp_mask); 392 xfsbufd_wakeup(0, gfp_mask);
392 blk_congestion_wait(WRITE, HZ/50); 393 blk_congestion_wait(WRITE, HZ/50);
393 goto retry; 394 goto retry;
394 } 395 }
395 396
396 XFS_STATS_INC(pb_page_found); 397 XFS_STATS_INC(xb_page_found);
397 398
398 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); 399 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
399 size -= nbytes; 400 size -= nbytes;
@@ -401,27 +402,27 @@ _pagebuf_lookup_pages(
401 if (!PageUptodate(page)) { 402 if (!PageUptodate(page)) {
402 page_count--; 403 page_count--;
403 if (blocksize >= PAGE_CACHE_SIZE) { 404 if (blocksize >= PAGE_CACHE_SIZE) {
404 if (flags & PBF_READ) 405 if (flags & XBF_READ)
405 bp->pb_locked = 1; 406 bp->b_locked = 1;
406 } else if (!PagePrivate(page)) { 407 } else if (!PagePrivate(page)) {
407 if (test_page_region(page, offset, nbytes)) 408 if (test_page_region(page, offset, nbytes))
408 page_count++; 409 page_count++;
409 } 410 }
410 } 411 }
411 412
412 bp->pb_pages[i] = page; 413 bp->b_pages[i] = page;
413 offset = 0; 414 offset = 0;
414 } 415 }
415 416
416 if (!bp->pb_locked) { 417 if (!bp->b_locked) {
417 for (i = 0; i < bp->pb_page_count; i++) 418 for (i = 0; i < bp->b_page_count; i++)
418 unlock_page(bp->pb_pages[i]); 419 unlock_page(bp->b_pages[i]);
419 } 420 }
420 421
421 if (page_count == bp->pb_page_count) 422 if (page_count == bp->b_page_count)
422 bp->pb_flags |= PBF_DONE; 423 bp->b_flags |= XBF_DONE;
423 424
424 PB_TRACE(bp, "lookup_pages", (long)page_count); 425 XB_TRACE(bp, "lookup_pages", (long)page_count);
425 return error; 426 return error;
426} 427}
427 428
@@ -429,23 +430,23 @@ _pagebuf_lookup_pages(
429 * Map buffer into kernel address-space if nessecary. 430 * Map buffer into kernel address-space if nessecary.
430 */ 431 */
431STATIC int 432STATIC int
432_pagebuf_map_pages( 433_xfs_buf_map_pages(
433 xfs_buf_t *bp, 434 xfs_buf_t *bp,
434 uint flags) 435 uint flags)
435{ 436{
436 /* A single page buffer is always mappable */ 437 /* A single page buffer is always mappable */
437 if (bp->pb_page_count == 1) { 438 if (bp->b_page_count == 1) {
438 bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset; 439 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
439 bp->pb_flags |= PBF_MAPPED; 440 bp->b_flags |= XBF_MAPPED;
440 } else if (flags & PBF_MAPPED) { 441 } else if (flags & XBF_MAPPED) {
441 if (as_list_len > 64) 442 if (as_list_len > 64)
442 purge_addresses(); 443 purge_addresses();
443 bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count, 444 bp->b_addr = vmap(bp->b_pages, bp->b_page_count,
444 VM_MAP, PAGE_KERNEL); 445 VM_MAP, PAGE_KERNEL);
445 if (unlikely(bp->pb_addr == NULL)) 446 if (unlikely(bp->b_addr == NULL))
446 return -ENOMEM; 447 return -ENOMEM;
447 bp->pb_addr += bp->pb_offset; 448 bp->b_addr += bp->b_offset;
448 bp->pb_flags |= PBF_MAPPED; 449 bp->b_flags |= XBF_MAPPED;
449 } 450 }
450 451
451 return 0; 452 return 0;
@@ -456,9 +457,7 @@ _pagebuf_map_pages(
456 */ 457 */
457 458
458/* 459/*
459 * _pagebuf_find 460 * Look up, and creates if absent, a lockable buffer for
460 *
461 * Looks up, and creates if absent, a lockable buffer for
462 * a given range of an inode. The buffer is returned 461 * a given range of an inode. The buffer is returned
463 * locked. If other overlapping buffers exist, they are 462 * locked. If other overlapping buffers exist, they are
464 * released before the new buffer is created and locked, 463 * released before the new buffer is created and locked,
@@ -466,55 +465,55 @@ _pagebuf_map_pages(
466 * are unlocked. No I/O is implied by this call. 465 * are unlocked. No I/O is implied by this call.
467 */ 466 */
468xfs_buf_t * 467xfs_buf_t *
469_pagebuf_find( 468_xfs_buf_find(
470 xfs_buftarg_t *btp, /* block device target */ 469 xfs_buftarg_t *btp, /* block device target */
471 loff_t ioff, /* starting offset of range */ 470 xfs_off_t ioff, /* starting offset of range */
472 size_t isize, /* length of range */ 471 size_t isize, /* length of range */
473 page_buf_flags_t flags, /* PBF_TRYLOCK */ 472 xfs_buf_flags_t flags,
474 xfs_buf_t *new_pb)/* newly allocated buffer */ 473 xfs_buf_t *new_bp)
475{ 474{
476 loff_t range_base; 475 xfs_off_t range_base;
477 size_t range_length; 476 size_t range_length;
478 xfs_bufhash_t *hash; 477 xfs_bufhash_t *hash;
479 xfs_buf_t *pb, *n; 478 xfs_buf_t *bp, *n;
480 479
481 range_base = (ioff << BBSHIFT); 480 range_base = (ioff << BBSHIFT);
482 range_length = (isize << BBSHIFT); 481 range_length = (isize << BBSHIFT);
483 482
484 /* Check for IOs smaller than the sector size / not sector aligned */ 483 /* Check for IOs smaller than the sector size / not sector aligned */
485 ASSERT(!(range_length < (1 << btp->pbr_sshift))); 484 ASSERT(!(range_length < (1 << btp->bt_sshift)));
486 ASSERT(!(range_base & (loff_t)btp->pbr_smask)); 485 ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
487 486
488 hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; 487 hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
489 488
490 spin_lock(&hash->bh_lock); 489 spin_lock(&hash->bh_lock);
491 490
492 list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) { 491 list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
493 ASSERT(btp == pb->pb_target); 492 ASSERT(btp == bp->b_target);
494 if (pb->pb_file_offset == range_base && 493 if (bp->b_file_offset == range_base &&
495 pb->pb_buffer_length == range_length) { 494 bp->b_buffer_length == range_length) {
496 /* 495 /*
497 * If we look at something bring it to the 496 * If we look at something, bring it to the
498 * front of the list for next time. 497 * front of the list for next time.
499 */ 498 */
500 atomic_inc(&pb->pb_hold); 499 atomic_inc(&bp->b_hold);
501 list_move(&pb->pb_hash_list, &hash->bh_list); 500 list_move(&bp->b_hash_list, &hash->bh_list);
502 goto found; 501 goto found;
503 } 502 }
504 } 503 }
505 504
506 /* No match found */ 505 /* No match found */
507 if (new_pb) { 506 if (new_bp) {
508 _pagebuf_initialize(new_pb, btp, range_base, 507 _xfs_buf_initialize(new_bp, btp, range_base,
509 range_length, flags); 508 range_length, flags);
510 new_pb->pb_hash = hash; 509 new_bp->b_hash = hash;
511 list_add(&new_pb->pb_hash_list, &hash->bh_list); 510 list_add(&new_bp->b_hash_list, &hash->bh_list);
512 } else { 511 } else {
513 XFS_STATS_INC(pb_miss_locked); 512 XFS_STATS_INC(xb_miss_locked);
514 } 513 }
515 514
516 spin_unlock(&hash->bh_lock); 515 spin_unlock(&hash->bh_lock);
517 return new_pb; 516 return new_bp;
518 517
519found: 518found:
520 spin_unlock(&hash->bh_lock); 519 spin_unlock(&hash->bh_lock);
@@ -523,74 +522,72 @@ found:
523 * if this does not work then we need to drop the 522 * if this does not work then we need to drop the
524 * spinlock and do a hard attempt on the semaphore. 523 * spinlock and do a hard attempt on the semaphore.
525 */ 524 */
526 if (down_trylock(&pb->pb_sema)) { 525 if (down_trylock(&bp->b_sema)) {
527 if (!(flags & PBF_TRYLOCK)) { 526 if (!(flags & XBF_TRYLOCK)) {
528 /* wait for buffer ownership */ 527 /* wait for buffer ownership */
529 PB_TRACE(pb, "get_lock", 0); 528 XB_TRACE(bp, "get_lock", 0);
530 pagebuf_lock(pb); 529 xfs_buf_lock(bp);
531 XFS_STATS_INC(pb_get_locked_waited); 530 XFS_STATS_INC(xb_get_locked_waited);
532 } else { 531 } else {
533 /* We asked for a trylock and failed, no need 532 /* We asked for a trylock and failed, no need
534 * to look at file offset and length here, we 533 * to look at file offset and length here, we
535 * know that this pagebuf at least overlaps our 534 * know that this buffer at least overlaps our
536 * pagebuf and is locked, therefore our buffer 535 * buffer and is locked, therefore our buffer
537 * either does not exist, or is this buffer 536 * either does not exist, or is this buffer.
538 */ 537 */
539 538 xfs_buf_rele(bp);
540 pagebuf_rele(pb); 539 XFS_STATS_INC(xb_busy_locked);
541 XFS_STATS_INC(pb_busy_locked); 540 return NULL;
542 return (NULL);
543 } 541 }
544 } else { 542 } else {
545 /* trylock worked */ 543 /* trylock worked */
546 PB_SET_OWNER(pb); 544 XB_SET_OWNER(bp);
547 } 545 }
548 546
549 if (pb->pb_flags & PBF_STALE) { 547 if (bp->b_flags & XBF_STALE) {
550 ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0); 548 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
551 pb->pb_flags &= PBF_MAPPED; 549 bp->b_flags &= XBF_MAPPED;
552 } 550 }
553 PB_TRACE(pb, "got_lock", 0); 551 XB_TRACE(bp, "got_lock", 0);
554 XFS_STATS_INC(pb_get_locked); 552 XFS_STATS_INC(xb_get_locked);
555 return (pb); 553 return bp;
556} 554}
557 555
558/* 556/*
559 * xfs_buf_get_flags assembles a buffer covering the specified range. 557 * Assembles a buffer covering the specified range.
560 *
561 * Storage in memory for all portions of the buffer will be allocated, 558 * Storage in memory for all portions of the buffer will be allocated,
562 * although backing storage may not be. 559 * although backing storage may not be.
563 */ 560 */
564xfs_buf_t * 561xfs_buf_t *
565xfs_buf_get_flags( /* allocate a buffer */ 562xfs_buf_get_flags(
566 xfs_buftarg_t *target,/* target for buffer */ 563 xfs_buftarg_t *target,/* target for buffer */
567 loff_t ioff, /* starting offset of range */ 564 xfs_off_t ioff, /* starting offset of range */
568 size_t isize, /* length of range */ 565 size_t isize, /* length of range */
569 page_buf_flags_t flags) /* PBF_TRYLOCK */ 566 xfs_buf_flags_t flags)
570{ 567{
571 xfs_buf_t *pb, *new_pb; 568 xfs_buf_t *bp, *new_bp;
572 int error = 0, i; 569 int error = 0, i;
573 570
574 new_pb = pagebuf_allocate(flags); 571 new_bp = xfs_buf_allocate(flags);
575 if (unlikely(!new_pb)) 572 if (unlikely(!new_bp))
576 return NULL; 573 return NULL;
577 574
578 pb = _pagebuf_find(target, ioff, isize, flags, new_pb); 575 bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
579 if (pb == new_pb) { 576 if (bp == new_bp) {
580 error = _pagebuf_lookup_pages(pb, flags); 577 error = _xfs_buf_lookup_pages(bp, flags);
581 if (error) 578 if (error)
582 goto no_buffer; 579 goto no_buffer;
583 } else { 580 } else {
584 pagebuf_deallocate(new_pb); 581 xfs_buf_deallocate(new_bp);
585 if (unlikely(pb == NULL)) 582 if (unlikely(bp == NULL))
586 return NULL; 583 return NULL;
587 } 584 }
588 585
589 for (i = 0; i < pb->pb_page_count; i++) 586 for (i = 0; i < bp->b_page_count; i++)
590 mark_page_accessed(pb->pb_pages[i]); 587 mark_page_accessed(bp->b_pages[i]);
591 588
592 if (!(pb->pb_flags & PBF_MAPPED)) { 589 if (!(bp->b_flags & XBF_MAPPED)) {
593 error = _pagebuf_map_pages(pb, flags); 590 error = _xfs_buf_map_pages(bp, flags);
594 if (unlikely(error)) { 591 if (unlikely(error)) {
595 printk(KERN_WARNING "%s: failed to map pages\n", 592 printk(KERN_WARNING "%s: failed to map pages\n",
596 __FUNCTION__); 593 __FUNCTION__);
@@ -598,97 +595,97 @@ xfs_buf_get_flags( /* allocate a buffer */
598 } 595 }
599 } 596 }
600 597
601 XFS_STATS_INC(pb_get); 598 XFS_STATS_INC(xb_get);
602 599
603 /* 600 /*
604 * Always fill in the block number now, the mapped cases can do 601 * Always fill in the block number now, the mapped cases can do
605 * their own overlay of this later. 602 * their own overlay of this later.
606 */ 603 */
607 pb->pb_bn = ioff; 604 bp->b_bn = ioff;
608 pb->pb_count_desired = pb->pb_buffer_length; 605 bp->b_count_desired = bp->b_buffer_length;
609 606
610 PB_TRACE(pb, "get", (unsigned long)flags); 607 XB_TRACE(bp, "get", (unsigned long)flags);
611 return pb; 608 return bp;
612 609
613 no_buffer: 610 no_buffer:
614 if (flags & (PBF_LOCK | PBF_TRYLOCK)) 611 if (flags & (XBF_LOCK | XBF_TRYLOCK))
615 pagebuf_unlock(pb); 612 xfs_buf_unlock(bp);
616 pagebuf_rele(pb); 613 xfs_buf_rele(bp);
617 return NULL; 614 return NULL;
618} 615}
619 616
620xfs_buf_t * 617xfs_buf_t *
621xfs_buf_read_flags( 618xfs_buf_read_flags(
622 xfs_buftarg_t *target, 619 xfs_buftarg_t *target,
623 loff_t ioff, 620 xfs_off_t ioff,
624 size_t isize, 621 size_t isize,
625 page_buf_flags_t flags) 622 xfs_buf_flags_t flags)
626{ 623{
627 xfs_buf_t *pb; 624 xfs_buf_t *bp;
628 625
629 flags |= PBF_READ; 626 flags |= XBF_READ;
630 627
631 pb = xfs_buf_get_flags(target, ioff, isize, flags); 628 bp = xfs_buf_get_flags(target, ioff, isize, flags);
632 if (pb) { 629 if (bp) {
633 if (!XFS_BUF_ISDONE(pb)) { 630 if (!XFS_BUF_ISDONE(bp)) {
634 PB_TRACE(pb, "read", (unsigned long)flags); 631 XB_TRACE(bp, "read", (unsigned long)flags);
635 XFS_STATS_INC(pb_get_read); 632 XFS_STATS_INC(xb_get_read);
636 pagebuf_iostart(pb, flags); 633 xfs_buf_iostart(bp, flags);
637 } else if (flags & PBF_ASYNC) { 634 } else if (flags & XBF_ASYNC) {
638 PB_TRACE(pb, "read_async", (unsigned long)flags); 635 XB_TRACE(bp, "read_async", (unsigned long)flags);
639 /* 636 /*
640 * Read ahead call which is already satisfied, 637 * Read ahead call which is already satisfied,
641 * drop the buffer 638 * drop the buffer
642 */ 639 */
643 goto no_buffer; 640 goto no_buffer;
644 } else { 641 } else {
645 PB_TRACE(pb, "read_done", (unsigned long)flags); 642 XB_TRACE(bp, "read_done", (unsigned long)flags);
646 /* We do not want read in the flags */ 643 /* We do not want read in the flags */
647 pb->pb_flags &= ~PBF_READ; 644 bp->b_flags &= ~XBF_READ;
648 } 645 }
649 } 646 }
650 647
651 return pb; 648 return bp;
652 649
653 no_buffer: 650 no_buffer:
654 if (flags & (PBF_LOCK | PBF_TRYLOCK)) 651 if (flags & (XBF_LOCK | XBF_TRYLOCK))
655 pagebuf_unlock(pb); 652 xfs_buf_unlock(bp);
656 pagebuf_rele(pb); 653 xfs_buf_rele(bp);
657 return NULL; 654 return NULL;
658} 655}
659 656
660/* 657/*
661 * If we are not low on memory then do the readahead in a deadlock 658 * If we are not low on memory then do the readahead in a deadlock
662 * safe manner. 659 * safe manner.
663 */ 660 */
664void 661void
665pagebuf_readahead( 662xfs_buf_readahead(
666 xfs_buftarg_t *target, 663 xfs_buftarg_t *target,
667 loff_t ioff, 664 xfs_off_t ioff,
668 size_t isize, 665 size_t isize,
669 page_buf_flags_t flags) 666 xfs_buf_flags_t flags)
670{ 667{
671 struct backing_dev_info *bdi; 668 struct backing_dev_info *bdi;
672 669
673 bdi = target->pbr_mapping->backing_dev_info; 670 bdi = target->bt_mapping->backing_dev_info;
674 if (bdi_read_congested(bdi)) 671 if (bdi_read_congested(bdi))
675 return; 672 return;
676 673
677 flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD); 674 flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
678 xfs_buf_read_flags(target, ioff, isize, flags); 675 xfs_buf_read_flags(target, ioff, isize, flags);
679} 676}
680 677
681xfs_buf_t * 678xfs_buf_t *
682pagebuf_get_empty( 679xfs_buf_get_empty(
683 size_t len, 680 size_t len,
684 xfs_buftarg_t *target) 681 xfs_buftarg_t *target)
685{ 682{
686 xfs_buf_t *pb; 683 xfs_buf_t *bp;
687 684
688 pb = pagebuf_allocate(0); 685 bp = xfs_buf_allocate(0);
689 if (pb) 686 if (bp)
690 _pagebuf_initialize(pb, target, 0, len, 0); 687 _xfs_buf_initialize(bp, target, 0, len, 0);
691 return pb; 688 return bp;
692} 689}
693 690
694static inline struct page * 691static inline struct page *
@@ -704,8 +701,8 @@ mem_to_page(
704} 701}
705 702
706int 703int
707pagebuf_associate_memory( 704xfs_buf_associate_memory(
708 xfs_buf_t *pb, 705 xfs_buf_t *bp,
709 void *mem, 706 void *mem,
710 size_t len) 707 size_t len)
711{ 708{
@@ -722,40 +719,40 @@ pagebuf_associate_memory(
722 page_count++; 719 page_count++;
723 720
724 /* Free any previous set of page pointers */ 721 /* Free any previous set of page pointers */
725 if (pb->pb_pages) 722 if (bp->b_pages)
726 _pagebuf_free_pages(pb); 723 _xfs_buf_free_pages(bp);
727 724
728 pb->pb_pages = NULL; 725 bp->b_pages = NULL;
729 pb->pb_addr = mem; 726 bp->b_addr = mem;
730 727
731 rval = _pagebuf_get_pages(pb, page_count, 0); 728 rval = _xfs_buf_get_pages(bp, page_count, 0);
732 if (rval) 729 if (rval)
733 return rval; 730 return rval;
734 731
735 pb->pb_offset = offset; 732 bp->b_offset = offset;
736 ptr = (size_t) mem & PAGE_CACHE_MASK; 733 ptr = (size_t) mem & PAGE_CACHE_MASK;
737 end = PAGE_CACHE_ALIGN((size_t) mem + len); 734 end = PAGE_CACHE_ALIGN((size_t) mem + len);
738 end_cur = end; 735 end_cur = end;
739 /* set up first page */ 736 /* set up first page */
740 pb->pb_pages[0] = mem_to_page(mem); 737 bp->b_pages[0] = mem_to_page(mem);
741 738
742 ptr += PAGE_CACHE_SIZE; 739 ptr += PAGE_CACHE_SIZE;
743 pb->pb_page_count = ++i; 740 bp->b_page_count = ++i;
744 while (ptr < end) { 741 while (ptr < end) {
745 pb->pb_pages[i] = mem_to_page((void *)ptr); 742 bp->b_pages[i] = mem_to_page((void *)ptr);
746 pb->pb_page_count = ++i; 743 bp->b_page_count = ++i;
747 ptr += PAGE_CACHE_SIZE; 744 ptr += PAGE_CACHE_SIZE;
748 } 745 }
749 pb->pb_locked = 0; 746 bp->b_locked = 0;
750 747
751 pb->pb_count_desired = pb->pb_buffer_length = len; 748 bp->b_count_desired = bp->b_buffer_length = len;
752 pb->pb_flags |= PBF_MAPPED; 749 bp->b_flags |= XBF_MAPPED;
753 750
754 return 0; 751 return 0;
755} 752}
756 753
757xfs_buf_t * 754xfs_buf_t *
758pagebuf_get_no_daddr( 755xfs_buf_get_noaddr(
759 size_t len, 756 size_t len,
760 xfs_buftarg_t *target) 757 xfs_buftarg_t *target)
761{ 758{
@@ -764,10 +761,10 @@ pagebuf_get_no_daddr(
764 void *data; 761 void *data;
765 int error; 762 int error;
766 763
767 bp = pagebuf_allocate(0); 764 bp = xfs_buf_allocate(0);
768 if (unlikely(bp == NULL)) 765 if (unlikely(bp == NULL))
769 goto fail; 766 goto fail;
770 _pagebuf_initialize(bp, target, 0, len, 0); 767 _xfs_buf_initialize(bp, target, 0, len, 0);
771 768
772 try_again: 769 try_again:
773 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL); 770 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);
@@ -776,78 +773,73 @@ pagebuf_get_no_daddr(
776 773
777 /* check whether alignment matches.. */ 774 /* check whether alignment matches.. */
778 if ((__psunsigned_t)data != 775 if ((__psunsigned_t)data !=
779 ((__psunsigned_t)data & ~target->pbr_smask)) { 776 ((__psunsigned_t)data & ~target->bt_smask)) {
780 /* .. else double the size and try again */ 777 /* .. else double the size and try again */
781 kmem_free(data, malloc_len); 778 kmem_free(data, malloc_len);
782 malloc_len <<= 1; 779 malloc_len <<= 1;
783 goto try_again; 780 goto try_again;
784 } 781 }
785 782
786 error = pagebuf_associate_memory(bp, data, len); 783 error = xfs_buf_associate_memory(bp, data, len);
787 if (error) 784 if (error)
788 goto fail_free_mem; 785 goto fail_free_mem;
789 bp->pb_flags |= _PBF_KMEM_ALLOC; 786 bp->b_flags |= _XBF_KMEM_ALLOC;
790 787
791 pagebuf_unlock(bp); 788 xfs_buf_unlock(bp);
792 789
793 PB_TRACE(bp, "no_daddr", data); 790 XB_TRACE(bp, "no_daddr", data);
794 return bp; 791 return bp;
795 fail_free_mem: 792 fail_free_mem:
796 kmem_free(data, malloc_len); 793 kmem_free(data, malloc_len);
797 fail_free_buf: 794 fail_free_buf:
798 pagebuf_free(bp); 795 xfs_buf_free(bp);
799 fail: 796 fail:
800 return NULL; 797 return NULL;
801} 798}
802 799
803/* 800/*
804 * pagebuf_hold
805 *
806 * Increment reference count on buffer, to hold the buffer concurrently 801 * Increment reference count on buffer, to hold the buffer concurrently
807 * with another thread which may release (free) the buffer asynchronously. 802 * with another thread which may release (free) the buffer asynchronously.
808 *
809 * Must hold the buffer already to call this function. 803 * Must hold the buffer already to call this function.
810 */ 804 */
811void 805void
812pagebuf_hold( 806xfs_buf_hold(
813 xfs_buf_t *pb) 807 xfs_buf_t *bp)
814{ 808{
815 atomic_inc(&pb->pb_hold); 809 atomic_inc(&bp->b_hold);
816 PB_TRACE(pb, "hold", 0); 810 XB_TRACE(bp, "hold", 0);
817} 811}
818 812
819/* 813/*
820 * pagebuf_rele 814 * Releases a hold on the specified buffer. If the
821 * 815 * the hold count is 1, calls xfs_buf_free.
822 * pagebuf_rele releases a hold on the specified buffer. If the
823 * the hold count is 1, pagebuf_rele calls pagebuf_free.
824 */ 816 */
825void 817void
826pagebuf_rele( 818xfs_buf_rele(
827 xfs_buf_t *pb) 819 xfs_buf_t *bp)
828{ 820{
829 xfs_bufhash_t *hash = pb->pb_hash; 821 xfs_bufhash_t *hash = bp->b_hash;
830 822
831 PB_TRACE(pb, "rele", pb->pb_relse); 823 XB_TRACE(bp, "rele", bp->b_relse);
832 824
833 if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) { 825 if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
834 if (pb->pb_relse) { 826 if (bp->b_relse) {
835 atomic_inc(&pb->pb_hold); 827 atomic_inc(&bp->b_hold);
836 spin_unlock(&hash->bh_lock); 828 spin_unlock(&hash->bh_lock);
837 (*(pb->pb_relse)) (pb); 829 (*(bp->b_relse)) (bp);
838 } else if (pb->pb_flags & PBF_FS_MANAGED) { 830 } else if (bp->b_flags & XBF_FS_MANAGED) {
839 spin_unlock(&hash->bh_lock); 831 spin_unlock(&hash->bh_lock);
840 } else { 832 } else {
841 ASSERT(!(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q))); 833 ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
842 list_del_init(&pb->pb_hash_list); 834 list_del_init(&bp->b_hash_list);
843 spin_unlock(&hash->bh_lock); 835 spin_unlock(&hash->bh_lock);
844 pagebuf_free(pb); 836 xfs_buf_free(bp);
845 } 837 }
846 } else { 838 } else {
847 /* 839 /*
848 * Catch reference count leaks 840 * Catch reference count leaks
849 */ 841 */
850 ASSERT(atomic_read(&pb->pb_hold) >= 0); 842 ASSERT(atomic_read(&bp->b_hold) >= 0);
851 } 843 }
852} 844}
853 845
@@ -863,168 +855,122 @@ pagebuf_rele(
863 */ 855 */
864 856
865/* 857/*
866 * pagebuf_cond_lock 858 * Locks a buffer object, if it is not already locked.
867 * 859 * Note that this in no way locks the underlying pages, so it is only
868 * pagebuf_cond_lock locks a buffer object, if it is not already locked. 860 * useful for synchronizing concurrent use of buffer objects, not for
869 * Note that this in no way 861 * synchronizing independent access to the underlying pages.
870 * locks the underlying pages, so it is only useful for synchronizing
871 * concurrent use of page buffer objects, not for synchronizing independent
872 * access to the underlying pages.
873 */ 862 */
874int 863int
875pagebuf_cond_lock( /* lock buffer, if not locked */ 864xfs_buf_cond_lock(
876 /* returns -EBUSY if locked) */ 865 xfs_buf_t *bp)
877 xfs_buf_t *pb)
878{ 866{
879 int locked; 867 int locked;
880 868
881 locked = down_trylock(&pb->pb_sema) == 0; 869 locked = down_trylock(&bp->b_sema) == 0;
882 if (locked) { 870 if (locked) {
883 PB_SET_OWNER(pb); 871 XB_SET_OWNER(bp);
884 } 872 }
885 PB_TRACE(pb, "cond_lock", (long)locked); 873 XB_TRACE(bp, "cond_lock", (long)locked);
886 return(locked ? 0 : -EBUSY); 874 return locked ? 0 : -EBUSY;
887} 875}
888 876
889#if defined(DEBUG) || defined(XFS_BLI_TRACE) 877#if defined(DEBUG) || defined(XFS_BLI_TRACE)
890/*
891 * pagebuf_lock_value
892 *
893 * Return lock value for a pagebuf
894 */
895int 878int
896pagebuf_lock_value( 879xfs_buf_lock_value(
897 xfs_buf_t *pb) 880 xfs_buf_t *bp)
898{ 881{
899 return(atomic_read(&pb->pb_sema.count)); 882 return atomic_read(&bp->b_sema.count);
900} 883}
901#endif 884#endif
902 885
903/* 886/*
904 * pagebuf_lock 887 * Locks a buffer object.
905 * 888 * Note that this in no way locks the underlying pages, so it is only
906 * pagebuf_lock locks a buffer object. Note that this in no way 889 * useful for synchronizing concurrent use of buffer objects, not for
907 * locks the underlying pages, so it is only useful for synchronizing 890 * synchronizing independent access to the underlying pages.
908 * concurrent use of page buffer objects, not for synchronizing independent
909 * access to the underlying pages.
910 */ 891 */
911int 892void
912pagebuf_lock( 893xfs_buf_lock(
913 xfs_buf_t *pb) 894 xfs_buf_t *bp)
914{ 895{
915 PB_TRACE(pb, "lock", 0); 896 XB_TRACE(bp, "lock", 0);
916 if (atomic_read(&pb->pb_io_remaining)) 897 if (atomic_read(&bp->b_io_remaining))
917 blk_run_address_space(pb->pb_target->pbr_mapping); 898 blk_run_address_space(bp->b_target->bt_mapping);
918 down(&pb->pb_sema); 899 down(&bp->b_sema);
919 PB_SET_OWNER(pb); 900 XB_SET_OWNER(bp);
920 PB_TRACE(pb, "locked", 0); 901 XB_TRACE(bp, "locked", 0);
921 return 0;
922} 902}
923 903
924/* 904/*
925 * pagebuf_unlock 905 * Releases the lock on the buffer object.
926 *
927 * pagebuf_unlock releases the lock on the buffer object created by
928 * pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages
929 * created by pagebuf_pin).
930 *
931 * If the buffer is marked delwri but is not queued, do so before we 906 * If the buffer is marked delwri but is not queued, do so before we
932 * unlock the buffer as we need to set flags correctly. We also need to 907 * unlock the buffer as we need to set flags correctly. We also need to
933 * take a reference for the delwri queue because the unlocker is going to 908 * take a reference for the delwri queue because the unlocker is going to
934 * drop their's and they don't know we just queued it. 909 * drop their's and they don't know we just queued it.
935 */ 910 */
936void 911void
937pagebuf_unlock( /* unlock buffer */ 912xfs_buf_unlock(
938 xfs_buf_t *pb) /* buffer to unlock */ 913 xfs_buf_t *bp)
939{ 914{
940 if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) { 915 if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
941 atomic_inc(&pb->pb_hold); 916 atomic_inc(&bp->b_hold);
942 pb->pb_flags |= PBF_ASYNC; 917 bp->b_flags |= XBF_ASYNC;
943 pagebuf_delwri_queue(pb, 0); 918 xfs_buf_delwri_queue(bp, 0);
944 } 919 }
945 920
946 PB_CLEAR_OWNER(pb); 921 XB_CLEAR_OWNER(bp);
947 up(&pb->pb_sema); 922 up(&bp->b_sema);
948 PB_TRACE(pb, "unlock", 0); 923 XB_TRACE(bp, "unlock", 0);
949} 924}
950 925
951 926
952/* 927/*
953 * Pinning Buffer Storage in Memory 928 * Pinning Buffer Storage in Memory
954 */ 929 * Ensure that no attempt to force a buffer to disk will succeed.
955
956/*
957 * pagebuf_pin
958 *
959 * pagebuf_pin locks all of the memory represented by a buffer in
960 * memory. Multiple calls to pagebuf_pin and pagebuf_unpin, for
961 * the same or different buffers affecting a given page, will
962 * properly count the number of outstanding "pin" requests. The
963 * buffer may be released after the pagebuf_pin and a different
964 * buffer used when calling pagebuf_unpin, if desired.
965 * pagebuf_pin should be used by the file system when it wants be
966 * assured that no attempt will be made to force the affected
967 * memory to disk. It does not assure that a given logical page
968 * will not be moved to a different physical page.
969 */ 930 */
970void 931void
971pagebuf_pin( 932xfs_buf_pin(
972 xfs_buf_t *pb) 933 xfs_buf_t *bp)
973{ 934{
974 atomic_inc(&pb->pb_pin_count); 935 atomic_inc(&bp->b_pin_count);
975 PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter); 936 XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter);
976} 937}
977 938
978/*
979 * pagebuf_unpin
980 *
981 * pagebuf_unpin reverses the locking of memory performed by
982 * pagebuf_pin. Note that both functions affected the logical
983 * pages associated with the buffer, not the buffer itself.
984 */
985void 939void
986pagebuf_unpin( 940xfs_buf_unpin(
987 xfs_buf_t *pb) 941 xfs_buf_t *bp)
988{ 942{
989 if (atomic_dec_and_test(&pb->pb_pin_count)) { 943 if (atomic_dec_and_test(&bp->b_pin_count))
990 wake_up_all(&pb->pb_waiters); 944 wake_up_all(&bp->b_waiters);
991 } 945 XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter);
992 PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter);
993} 946}
994 947
995int 948int
996pagebuf_ispin( 949xfs_buf_ispin(
997 xfs_buf_t *pb) 950 xfs_buf_t *bp)
998{ 951{
999 return atomic_read(&pb->pb_pin_count); 952 return atomic_read(&bp->b_pin_count);
1000} 953}
1001 954
1002/* 955STATIC void
1003 * pagebuf_wait_unpin 956xfs_buf_wait_unpin(
1004 * 957 xfs_buf_t *bp)
1005 * pagebuf_wait_unpin waits until all of the memory associated
1006 * with the buffer is not longer locked in memory. It returns
1007 * immediately if none of the affected pages are locked.
1008 */
1009static inline void
1010_pagebuf_wait_unpin(
1011 xfs_buf_t *pb)
1012{ 958{
1013 DECLARE_WAITQUEUE (wait, current); 959 DECLARE_WAITQUEUE (wait, current);
1014 960
1015 if (atomic_read(&pb->pb_pin_count) == 0) 961 if (atomic_read(&bp->b_pin_count) == 0)
1016 return; 962 return;
1017 963
1018 add_wait_queue(&pb->pb_waiters, &wait); 964 add_wait_queue(&bp->b_waiters, &wait);
1019 for (;;) { 965 for (;;) {
1020 set_current_state(TASK_UNINTERRUPTIBLE); 966 set_current_state(TASK_UNINTERRUPTIBLE);
1021 if (atomic_read(&pb->pb_pin_count) == 0) 967 if (atomic_read(&bp->b_pin_count) == 0)
1022 break; 968 break;
1023 if (atomic_read(&pb->pb_io_remaining)) 969 if (atomic_read(&bp->b_io_remaining))
1024 blk_run_address_space(pb->pb_target->pbr_mapping); 970 blk_run_address_space(bp->b_target->bt_mapping);
1025 schedule(); 971 schedule();
1026 } 972 }
1027 remove_wait_queue(&pb->pb_waiters, &wait); 973 remove_wait_queue(&bp->b_waiters, &wait);
1028 set_current_state(TASK_RUNNING); 974 set_current_state(TASK_RUNNING);
1029} 975}
1030 976
@@ -1032,241 +978,216 @@ _pagebuf_wait_unpin(
1032 * Buffer Utility Routines 978 * Buffer Utility Routines
1033 */ 979 */
1034 980
1035/*
1036 * pagebuf_iodone
1037 *
1038 * pagebuf_iodone marks a buffer for which I/O is in progress
1039 * done with respect to that I/O. The pb_iodone routine, if
1040 * present, will be called as a side-effect.
1041 */
1042STATIC void 981STATIC void
1043pagebuf_iodone_work( 982xfs_buf_iodone_work(
1044 void *v) 983 void *v)
1045{ 984{
1046 xfs_buf_t *bp = (xfs_buf_t *)v; 985 xfs_buf_t *bp = (xfs_buf_t *)v;
1047 986
1048 if (bp->pb_iodone) 987 if (bp->b_iodone)
1049 (*(bp->pb_iodone))(bp); 988 (*(bp->b_iodone))(bp);
1050 else if (bp->pb_flags & PBF_ASYNC) 989 else if (bp->b_flags & XBF_ASYNC)
1051 xfs_buf_relse(bp); 990 xfs_buf_relse(bp);
1052} 991}
1053 992
1054void 993void
1055pagebuf_iodone( 994xfs_buf_ioend(
1056 xfs_buf_t *pb, 995 xfs_buf_t *bp,
1057 int schedule) 996 int schedule)
1058{ 997{
1059 pb->pb_flags &= ~(PBF_READ | PBF_WRITE); 998 bp->b_flags &= ~(XBF_READ | XBF_WRITE);
1060 if (pb->pb_error == 0) 999 if (bp->b_error == 0)
1061 pb->pb_flags |= PBF_DONE; 1000 bp->b_flags |= XBF_DONE;
1062 1001
1063 PB_TRACE(pb, "iodone", pb->pb_iodone); 1002 XB_TRACE(bp, "iodone", bp->b_iodone);
1064 1003
1065 if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) { 1004 if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
1066 if (schedule) { 1005 if (schedule) {
1067 INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb); 1006 INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work, bp);
1068 queue_work(xfslogd_workqueue, &pb->pb_iodone_work); 1007 queue_work(xfslogd_workqueue, &bp->b_iodone_work);
1069 } else { 1008 } else {
1070 pagebuf_iodone_work(pb); 1009 xfs_buf_iodone_work(bp);
1071 } 1010 }
1072 } else { 1011 } else {
1073 up(&pb->pb_iodonesema); 1012 up(&bp->b_iodonesema);
1074 } 1013 }
1075} 1014}
1076 1015
1077/*
1078 * pagebuf_ioerror
1079 *
1080 * pagebuf_ioerror sets the error code for a buffer.
1081 */
1082void 1016void
1083pagebuf_ioerror( /* mark/clear buffer error flag */ 1017xfs_buf_ioerror(
1084 xfs_buf_t *pb, /* buffer to mark */ 1018 xfs_buf_t *bp,
1085 int error) /* error to store (0 if none) */ 1019 int error)
1086{ 1020{
1087 ASSERT(error >= 0 && error <= 0xffff); 1021 ASSERT(error >= 0 && error <= 0xffff);
1088 pb->pb_error = (unsigned short)error; 1022 bp->b_error = (unsigned short)error;
1089 PB_TRACE(pb, "ioerror", (unsigned long)error); 1023 XB_TRACE(bp, "ioerror", (unsigned long)error);
1090} 1024}
1091 1025
1092/* 1026/*
1093 * pagebuf_iostart 1027 * Initiate I/O on a buffer, based on the flags supplied.
1094 * 1028 * The b_iodone routine in the buffer supplied will only be called
1095 * pagebuf_iostart initiates I/O on a buffer, based on the flags supplied.
1096 * If necessary, it will arrange for any disk space allocation required,
1097 * and it will break up the request if the block mappings require it.
1098 * The pb_iodone routine in the buffer supplied will only be called
1099 * when all of the subsidiary I/O requests, if any, have been completed. 1029 * when all of the subsidiary I/O requests, if any, have been completed.
1100 * pagebuf_iostart calls the pagebuf_ioinitiate routine or
1101 * pagebuf_iorequest, if the former routine is not defined, to start
1102 * the I/O on a given low-level request.
1103 */ 1030 */
1104int 1031int
1105pagebuf_iostart( /* start I/O on a buffer */ 1032xfs_buf_iostart(
1106 xfs_buf_t *pb, /* buffer to start */ 1033 xfs_buf_t *bp,
1107 page_buf_flags_t flags) /* PBF_LOCK, PBF_ASYNC, PBF_READ, */ 1034 xfs_buf_flags_t flags)
1108 /* PBF_WRITE, PBF_DELWRI, */
1109 /* PBF_DONT_BLOCK */
1110{ 1035{
1111 int status = 0; 1036 int status = 0;
1112 1037
1113 PB_TRACE(pb, "iostart", (unsigned long)flags); 1038 XB_TRACE(bp, "iostart", (unsigned long)flags);
1114 1039
1115 if (flags & PBF_DELWRI) { 1040 if (flags & XBF_DELWRI) {
1116 pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC); 1041 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC);
1117 pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC); 1042 bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC);
1118 pagebuf_delwri_queue(pb, 1); 1043 xfs_buf_delwri_queue(bp, 1);
1119 return status; 1044 return status;
1120 } 1045 }
1121 1046
1122 pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \ 1047 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
1123 PBF_READ_AHEAD | _PBF_RUN_QUEUES); 1048 XBF_READ_AHEAD | _XBF_RUN_QUEUES);
1124 pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \ 1049 bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \
1125 PBF_READ_AHEAD | _PBF_RUN_QUEUES); 1050 XBF_READ_AHEAD | _XBF_RUN_QUEUES);
1126 1051
1127 BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL); 1052 BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL);
1128 1053
1129 /* For writes allow an alternate strategy routine to precede 1054 /* For writes allow an alternate strategy routine to precede
1130 * the actual I/O request (which may not be issued at all in 1055 * the actual I/O request (which may not be issued at all in
1131 * a shutdown situation, for example). 1056 * a shutdown situation, for example).
1132 */ 1057 */
1133 status = (flags & PBF_WRITE) ? 1058 status = (flags & XBF_WRITE) ?
1134 pagebuf_iostrategy(pb) : pagebuf_iorequest(pb); 1059 xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp);
1135 1060
1136 /* Wait for I/O if we are not an async request. 1061 /* Wait for I/O if we are not an async request.
1137 * Note: async I/O request completion will release the buffer, 1062 * Note: async I/O request completion will release the buffer,
1138 * and that can already be done by this point. So using the 1063 * and that can already be done by this point. So using the
1139 * buffer pointer from here on, after async I/O, is invalid. 1064 * buffer pointer from here on, after async I/O, is invalid.
1140 */ 1065 */
1141 if (!status && !(flags & PBF_ASYNC)) 1066 if (!status && !(flags & XBF_ASYNC))
1142 status = pagebuf_iowait(pb); 1067 status = xfs_buf_iowait(bp);
1143 1068
1144 return status; 1069 return status;
1145} 1070}
1146 1071
1147/*
1148 * Helper routine for pagebuf_iorequest
1149 */
1150
1151STATIC __inline__ int 1072STATIC __inline__ int
1152_pagebuf_iolocked( 1073_xfs_buf_iolocked(
1153 xfs_buf_t *pb) 1074 xfs_buf_t *bp)
1154{ 1075{
1155 ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE)); 1076 ASSERT(bp->b_flags & (XBF_READ | XBF_WRITE));
1156 if (pb->pb_flags & PBF_READ) 1077 if (bp->b_flags & XBF_READ)
1157 return pb->pb_locked; 1078 return bp->b_locked;
1158 return 0; 1079 return 0;
1159} 1080}
1160 1081
1161STATIC __inline__ void 1082STATIC __inline__ void
1162_pagebuf_iodone( 1083_xfs_buf_ioend(
1163 xfs_buf_t *pb, 1084 xfs_buf_t *bp,
1164 int schedule) 1085 int schedule)
1165{ 1086{
1166 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { 1087 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
1167 pb->pb_locked = 0; 1088 bp->b_locked = 0;
1168 pagebuf_iodone(pb, schedule); 1089 xfs_buf_ioend(bp, schedule);
1169 } 1090 }
1170} 1091}
1171 1092
1172STATIC int 1093STATIC int
1173bio_end_io_pagebuf( 1094xfs_buf_bio_end_io(
1174 struct bio *bio, 1095 struct bio *bio,
1175 unsigned int bytes_done, 1096 unsigned int bytes_done,
1176 int error) 1097 int error)
1177{ 1098{
1178 xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; 1099 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
1179 unsigned int blocksize = pb->pb_target->pbr_bsize; 1100 unsigned int blocksize = bp->b_target->bt_bsize;
1180 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1101 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1181 1102
1182 if (bio->bi_size) 1103 if (bio->bi_size)
1183 return 1; 1104 return 1;
1184 1105
1185 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 1106 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1186 pb->pb_error = EIO; 1107 bp->b_error = EIO;
1187 1108
1188 do { 1109 do {
1189 struct page *page = bvec->bv_page; 1110 struct page *page = bvec->bv_page;
1190 1111
1191 if (unlikely(pb->pb_error)) { 1112 if (unlikely(bp->b_error)) {
1192 if (pb->pb_flags & PBF_READ) 1113 if (bp->b_flags & XBF_READ)
1193 ClearPageUptodate(page); 1114 ClearPageUptodate(page);
1194 SetPageError(page); 1115 SetPageError(page);
1195 } else if (blocksize == PAGE_CACHE_SIZE) { 1116 } else if (blocksize >= PAGE_CACHE_SIZE) {
1196 SetPageUptodate(page); 1117 SetPageUptodate(page);
1197 } else if (!PagePrivate(page) && 1118 } else if (!PagePrivate(page) &&
1198 (pb->pb_flags & _PBF_PAGE_CACHE)) { 1119 (bp->b_flags & _XBF_PAGE_CACHE)) {
1199 set_page_region(page, bvec->bv_offset, bvec->bv_len); 1120 set_page_region(page, bvec->bv_offset, bvec->bv_len);
1200 } 1121 }
1201 1122
1202 if (--bvec >= bio->bi_io_vec) 1123 if (--bvec >= bio->bi_io_vec)
1203 prefetchw(&bvec->bv_page->flags); 1124 prefetchw(&bvec->bv_page->flags);
1204 1125
1205 if (_pagebuf_iolocked(pb)) { 1126 if (_xfs_buf_iolocked(bp)) {
1206 unlock_page(page); 1127 unlock_page(page);
1207 } 1128 }
1208 } while (bvec >= bio->bi_io_vec); 1129 } while (bvec >= bio->bi_io_vec);
1209 1130
1210 _pagebuf_iodone(pb, 1); 1131 _xfs_buf_ioend(bp, 1);
1211 bio_put(bio); 1132 bio_put(bio);
1212 return 0; 1133 return 0;
1213} 1134}
1214 1135
1215STATIC void 1136STATIC void
1216_pagebuf_ioapply( 1137_xfs_buf_ioapply(
1217 xfs_buf_t *pb) 1138 xfs_buf_t *bp)
1218{ 1139{
1219 int i, rw, map_i, total_nr_pages, nr_pages; 1140 int i, rw, map_i, total_nr_pages, nr_pages;
1220 struct bio *bio; 1141 struct bio *bio;
1221 int offset = pb->pb_offset; 1142 int offset = bp->b_offset;
1222 int size = pb->pb_count_desired; 1143 int size = bp->b_count_desired;
1223 sector_t sector = pb->pb_bn; 1144 sector_t sector = bp->b_bn;
1224 unsigned int blocksize = pb->pb_target->pbr_bsize; 1145 unsigned int blocksize = bp->b_target->bt_bsize;
1225 int locking = _pagebuf_iolocked(pb); 1146 int locking = _xfs_buf_iolocked(bp);
1226 1147
1227 total_nr_pages = pb->pb_page_count; 1148 total_nr_pages = bp->b_page_count;
1228 map_i = 0; 1149 map_i = 0;
1229 1150
1230 if (pb->pb_flags & _PBF_RUN_QUEUES) { 1151 if (bp->b_flags & _XBF_RUN_QUEUES) {
1231 pb->pb_flags &= ~_PBF_RUN_QUEUES; 1152 bp->b_flags &= ~_XBF_RUN_QUEUES;
1232 rw = (pb->pb_flags & PBF_READ) ? READ_SYNC : WRITE_SYNC; 1153 rw = (bp->b_flags & XBF_READ) ? READ_SYNC : WRITE_SYNC;
1233 } else { 1154 } else {
1234 rw = (pb->pb_flags & PBF_READ) ? READ : WRITE; 1155 rw = (bp->b_flags & XBF_READ) ? READ : WRITE;
1235 } 1156 }
1236 1157
1237 if (pb->pb_flags & PBF_ORDERED) { 1158 if (bp->b_flags & XBF_ORDERED) {
1238 ASSERT(!(pb->pb_flags & PBF_READ)); 1159 ASSERT(!(bp->b_flags & XBF_READ));
1239 rw = WRITE_BARRIER; 1160 rw = WRITE_BARRIER;
1240 } 1161 }
1241 1162
1242 /* Special code path for reading a sub page size pagebuf in -- 1163 /* Special code path for reading a sub page size buffer in --
1243 * we populate up the whole page, and hence the other metadata 1164 * we populate up the whole page, and hence the other metadata
1244 * in the same page. This optimization is only valid when the 1165 * in the same page. This optimization is only valid when the
1245 * filesystem block size and the page size are equal. 1166 * filesystem block size is not smaller than the page size.
1246 */ 1167 */
1247 if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) && 1168 if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
1248 (pb->pb_flags & PBF_READ) && locking && 1169 (bp->b_flags & XBF_READ) && locking &&
1249 (blocksize == PAGE_CACHE_SIZE)) { 1170 (blocksize >= PAGE_CACHE_SIZE)) {
1250 bio = bio_alloc(GFP_NOIO, 1); 1171 bio = bio_alloc(GFP_NOIO, 1);
1251 1172
1252 bio->bi_bdev = pb->pb_target->pbr_bdev; 1173 bio->bi_bdev = bp->b_target->bt_bdev;
1253 bio->bi_sector = sector - (offset >> BBSHIFT); 1174 bio->bi_sector = sector - (offset >> BBSHIFT);
1254 bio->bi_end_io = bio_end_io_pagebuf; 1175 bio->bi_end_io = xfs_buf_bio_end_io;
1255 bio->bi_private = pb; 1176 bio->bi_private = bp;
1256 1177
1257 bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0); 1178 bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0);
1258 size = 0; 1179 size = 0;
1259 1180
1260 atomic_inc(&pb->pb_io_remaining); 1181 atomic_inc(&bp->b_io_remaining);
1261 1182
1262 goto submit_io; 1183 goto submit_io;
1263 } 1184 }
1264 1185
1265 /* Lock down the pages which we need to for the request */ 1186 /* Lock down the pages which we need to for the request */
1266 if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) { 1187 if (locking && (bp->b_flags & XBF_WRITE) && (bp->b_locked == 0)) {
1267 for (i = 0; size; i++) { 1188 for (i = 0; size; i++) {
1268 int nbytes = PAGE_CACHE_SIZE - offset; 1189 int nbytes = PAGE_CACHE_SIZE - offset;
1269 struct page *page = pb->pb_pages[i]; 1190 struct page *page = bp->b_pages[i];
1270 1191
1271 if (nbytes > size) 1192 if (nbytes > size)
1272 nbytes = size; 1193 nbytes = size;
@@ -1276,30 +1197,30 @@ _pagebuf_ioapply(
1276 size -= nbytes; 1197 size -= nbytes;
1277 offset = 0; 1198 offset = 0;
1278 } 1199 }
1279 offset = pb->pb_offset; 1200 offset = bp->b_offset;
1280 size = pb->pb_count_desired; 1201 size = bp->b_count_desired;
1281 } 1202 }
1282 1203
1283next_chunk: 1204next_chunk:
1284 atomic_inc(&pb->pb_io_remaining); 1205 atomic_inc(&bp->b_io_remaining);
1285 nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); 1206 nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
1286 if (nr_pages > total_nr_pages) 1207 if (nr_pages > total_nr_pages)
1287 nr_pages = total_nr_pages; 1208 nr_pages = total_nr_pages;
1288 1209
1289 bio = bio_alloc(GFP_NOIO, nr_pages); 1210 bio = bio_alloc(GFP_NOIO, nr_pages);
1290 bio->bi_bdev = pb->pb_target->pbr_bdev; 1211 bio->bi_bdev = bp->b_target->bt_bdev;
1291 bio->bi_sector = sector; 1212 bio->bi_sector = sector;
1292 bio->bi_end_io = bio_end_io_pagebuf; 1213 bio->bi_end_io = xfs_buf_bio_end_io;
1293 bio->bi_private = pb; 1214 bio->bi_private = bp;
1294 1215
1295 for (; size && nr_pages; nr_pages--, map_i++) { 1216 for (; size && nr_pages; nr_pages--, map_i++) {
1296 int nbytes = PAGE_CACHE_SIZE - offset; 1217 int rbytes, nbytes = PAGE_CACHE_SIZE - offset;
1297 1218
1298 if (nbytes > size) 1219 if (nbytes > size)
1299 nbytes = size; 1220 nbytes = size;
1300 1221
1301 if (bio_add_page(bio, pb->pb_pages[map_i], 1222 rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
1302 nbytes, offset) < nbytes) 1223 if (rbytes < nbytes)
1303 break; 1224 break;
1304 1225
1305 offset = 0; 1226 offset = 0;
@@ -1315,107 +1236,102 @@ submit_io:
1315 goto next_chunk; 1236 goto next_chunk;
1316 } else { 1237 } else {
1317 bio_put(bio); 1238 bio_put(bio);
1318 pagebuf_ioerror(pb, EIO); 1239 xfs_buf_ioerror(bp, EIO);
1319 } 1240 }
1320} 1241}
1321 1242
1322/*
1323 * pagebuf_iorequest -- the core I/O request routine.
1324 */
1325int 1243int
1326pagebuf_iorequest( /* start real I/O */ 1244xfs_buf_iorequest(
1327 xfs_buf_t *pb) /* buffer to convey to device */ 1245 xfs_buf_t *bp)
1328{ 1246{
1329 PB_TRACE(pb, "iorequest", 0); 1247 XB_TRACE(bp, "iorequest", 0);
1330 1248
1331 if (pb->pb_flags & PBF_DELWRI) { 1249 if (bp->b_flags & XBF_DELWRI) {
1332 pagebuf_delwri_queue(pb, 1); 1250 xfs_buf_delwri_queue(bp, 1);
1333 return 0; 1251 return 0;
1334 } 1252 }
1335 1253
1336 if (pb->pb_flags & PBF_WRITE) { 1254 if (bp->b_flags & XBF_WRITE) {
1337 _pagebuf_wait_unpin(pb); 1255 xfs_buf_wait_unpin(bp);
1338 } 1256 }
1339 1257
1340 pagebuf_hold(pb); 1258 xfs_buf_hold(bp);
1341 1259
1342 /* Set the count to 1 initially, this will stop an I/O 1260 /* Set the count to 1 initially, this will stop an I/O
1343 * completion callout which happens before we have started 1261 * completion callout which happens before we have started
1344 * all the I/O from calling pagebuf_iodone too early. 1262 * all the I/O from calling xfs_buf_ioend too early.
1345 */ 1263 */
1346 atomic_set(&pb->pb_io_remaining, 1); 1264 atomic_set(&bp->b_io_remaining, 1);
1347 _pagebuf_ioapply(pb); 1265 _xfs_buf_ioapply(bp);
1348 _pagebuf_iodone(pb, 0); 1266 _xfs_buf_ioend(bp, 0);
1349 1267
1350 pagebuf_rele(pb); 1268 xfs_buf_rele(bp);
1351 return 0; 1269 return 0;
1352} 1270}
1353 1271
1354/* 1272/*
1355 * pagebuf_iowait 1273 * Waits for I/O to complete on the buffer supplied.
1356 * 1274 * It returns immediately if no I/O is pending.
1357 * pagebuf_iowait waits for I/O to complete on the buffer supplied. 1275 * It returns the I/O error code, if any, or 0 if there was no error.
1358 * It returns immediately if no I/O is pending. In any case, it returns
1359 * the error code, if any, or 0 if there is no error.
1360 */ 1276 */
1361int 1277int
1362pagebuf_iowait( 1278xfs_buf_iowait(
1363 xfs_buf_t *pb) 1279 xfs_buf_t *bp)
1364{ 1280{
1365 PB_TRACE(pb, "iowait", 0); 1281 XB_TRACE(bp, "iowait", 0);
1366 if (atomic_read(&pb->pb_io_remaining)) 1282 if (atomic_read(&bp->b_io_remaining))
1367 blk_run_address_space(pb->pb_target->pbr_mapping); 1283 blk_run_address_space(bp->b_target->bt_mapping);
1368 down(&pb->pb_iodonesema); 1284 down(&bp->b_iodonesema);
1369 PB_TRACE(pb, "iowaited", (long)pb->pb_error); 1285 XB_TRACE(bp, "iowaited", (long)bp->b_error);
1370 return pb->pb_error; 1286 return bp->b_error;
1371} 1287}
1372 1288
1373caddr_t 1289xfs_caddr_t
1374pagebuf_offset( 1290xfs_buf_offset(
1375 xfs_buf_t *pb, 1291 xfs_buf_t *bp,
1376 size_t offset) 1292 size_t offset)
1377{ 1293{
1378 struct page *page; 1294 struct page *page;
1379 1295
1380 offset += pb->pb_offset; 1296 if (bp->b_flags & XBF_MAPPED)
1297 return XFS_BUF_PTR(bp) + offset;
1381 1298
1382 page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT]; 1299 offset += bp->b_offset;
1383 return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1)); 1300 page = bp->b_pages[offset >> PAGE_CACHE_SHIFT];
1301 return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1));
1384} 1302}
1385 1303
1386/* 1304/*
1387 * pagebuf_iomove
1388 *
1389 * Move data into or out of a buffer. 1305 * Move data into or out of a buffer.
1390 */ 1306 */
1391void 1307void
1392pagebuf_iomove( 1308xfs_buf_iomove(
1393 xfs_buf_t *pb, /* buffer to process */ 1309 xfs_buf_t *bp, /* buffer to process */
1394 size_t boff, /* starting buffer offset */ 1310 size_t boff, /* starting buffer offset */
1395 size_t bsize, /* length to copy */ 1311 size_t bsize, /* length to copy */
1396 caddr_t data, /* data address */ 1312 caddr_t data, /* data address */
1397 page_buf_rw_t mode) /* read/write flag */ 1313 xfs_buf_rw_t mode) /* read/write/zero flag */
1398{ 1314{
1399 size_t bend, cpoff, csize; 1315 size_t bend, cpoff, csize;
1400 struct page *page; 1316 struct page *page;
1401 1317
1402 bend = boff + bsize; 1318 bend = boff + bsize;
1403 while (boff < bend) { 1319 while (boff < bend) {
1404 page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)]; 1320 page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
1405 cpoff = page_buf_poff(boff + pb->pb_offset); 1321 cpoff = xfs_buf_poff(boff + bp->b_offset);
1406 csize = min_t(size_t, 1322 csize = min_t(size_t,
1407 PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff); 1323 PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff);
1408 1324
1409 ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); 1325 ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));
1410 1326
1411 switch (mode) { 1327 switch (mode) {
1412 case PBRW_ZERO: 1328 case XBRW_ZERO:
1413 memset(page_address(page) + cpoff, 0, csize); 1329 memset(page_address(page) + cpoff, 0, csize);
1414 break; 1330 break;
1415 case PBRW_READ: 1331 case XBRW_READ:
1416 memcpy(data, page_address(page) + cpoff, csize); 1332 memcpy(data, page_address(page) + cpoff, csize);
1417 break; 1333 break;
1418 case PBRW_WRITE: 1334 case XBRW_WRITE:
1419 memcpy(page_address(page) + cpoff, data, csize); 1335 memcpy(page_address(page) + cpoff, data, csize);
1420 } 1336 }
1421 1337
@@ -1425,12 +1341,12 @@ pagebuf_iomove(
1425} 1341}
1426 1342
1427/* 1343/*
1428 * Handling of buftargs. 1344 * Handling of buffer targets (buftargs).
1429 */ 1345 */
1430 1346
1431/* 1347/*
1432 * Wait for any bufs with callbacks that have been submitted but 1348 * Wait for any bufs with callbacks that have been submitted but
1433 * have not yet returned... walk the hash list for the target. 1349 * have not yet returned... walk the hash list for the target.
1434 */ 1350 */
1435void 1351void
1436xfs_wait_buftarg( 1352xfs_wait_buftarg(
@@ -1444,15 +1360,15 @@ xfs_wait_buftarg(
1444 hash = &btp->bt_hash[i]; 1360 hash = &btp->bt_hash[i];
1445again: 1361again:
1446 spin_lock(&hash->bh_lock); 1362 spin_lock(&hash->bh_lock);
1447 list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) { 1363 list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
1448 ASSERT(btp == bp->pb_target); 1364 ASSERT(btp == bp->b_target);
1449 if (!(bp->pb_flags & PBF_FS_MANAGED)) { 1365 if (!(bp->b_flags & XBF_FS_MANAGED)) {
1450 spin_unlock(&hash->bh_lock); 1366 spin_unlock(&hash->bh_lock);
1451 /* 1367 /*
1452 * Catch superblock reference count leaks 1368 * Catch superblock reference count leaks
1453 * immediately 1369 * immediately
1454 */ 1370 */
1455 BUG_ON(bp->pb_bn == 0); 1371 BUG_ON(bp->b_bn == 0);
1456 delay(100); 1372 delay(100);
1457 goto again; 1373 goto again;
1458 } 1374 }
@@ -1462,9 +1378,9 @@ again:
1462} 1378}
1463 1379
1464/* 1380/*
1465 * Allocate buffer hash table for a given target. 1381 * Allocate buffer hash table for a given target.
1466 * For devices containing metadata (i.e. not the log/realtime devices) 1382 * For devices containing metadata (i.e. not the log/realtime devices)
1467 * we need to allocate a much larger hash table. 1383 * we need to allocate a much larger hash table.
1468 */ 1384 */
1469STATIC void 1385STATIC void
1470xfs_alloc_bufhash( 1386xfs_alloc_bufhash(
@@ -1487,11 +1403,34 @@ STATIC void
1487xfs_free_bufhash( 1403xfs_free_bufhash(
1488 xfs_buftarg_t *btp) 1404 xfs_buftarg_t *btp)
1489{ 1405{
1490 kmem_free(btp->bt_hash, 1406 kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t));
1491 (1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t));
1492 btp->bt_hash = NULL; 1407 btp->bt_hash = NULL;
1493} 1408}
1494 1409
1410/*
1411 * buftarg list for delwrite queue processing
1412 */
1413STATIC LIST_HEAD(xfs_buftarg_list);
1414STATIC DEFINE_SPINLOCK(xfs_buftarg_lock);
1415
1416STATIC void
1417xfs_register_buftarg(
1418 xfs_buftarg_t *btp)
1419{
1420 spin_lock(&xfs_buftarg_lock);
1421 list_add(&btp->bt_list, &xfs_buftarg_list);
1422 spin_unlock(&xfs_buftarg_lock);
1423}
1424
1425STATIC void
1426xfs_unregister_buftarg(
1427 xfs_buftarg_t *btp)
1428{
1429 spin_lock(&xfs_buftarg_lock);
1430 list_del(&btp->bt_list);
1431 spin_unlock(&xfs_buftarg_lock);
1432}
1433
1495void 1434void
1496xfs_free_buftarg( 1435xfs_free_buftarg(
1497 xfs_buftarg_t *btp, 1436 xfs_buftarg_t *btp,
@@ -1499,9 +1438,16 @@ xfs_free_buftarg(
1499{ 1438{
1500 xfs_flush_buftarg(btp, 1); 1439 xfs_flush_buftarg(btp, 1);
1501 if (external) 1440 if (external)
1502 xfs_blkdev_put(btp->pbr_bdev); 1441 xfs_blkdev_put(btp->bt_bdev);
1503 xfs_free_bufhash(btp); 1442 xfs_free_bufhash(btp);
1504 iput(btp->pbr_mapping->host); 1443 iput(btp->bt_mapping->host);
1444
1445 /* Unregister the buftarg first so that we don't get a
1446 * wakeup finding a non-existent task
1447 */
1448 xfs_unregister_buftarg(btp);
1449 kthread_stop(btp->bt_task);
1450
1505 kmem_free(btp, sizeof(*btp)); 1451 kmem_free(btp, sizeof(*btp));
1506} 1452}
1507 1453
@@ -1512,11 +1458,11 @@ xfs_setsize_buftarg_flags(
1512 unsigned int sectorsize, 1458 unsigned int sectorsize,
1513 int verbose) 1459 int verbose)
1514{ 1460{
1515 btp->pbr_bsize = blocksize; 1461 btp->bt_bsize = blocksize;
1516 btp->pbr_sshift = ffs(sectorsize) - 1; 1462 btp->bt_sshift = ffs(sectorsize) - 1;
1517 btp->pbr_smask = sectorsize - 1; 1463 btp->bt_smask = sectorsize - 1;
1518 1464
1519 if (set_blocksize(btp->pbr_bdev, sectorsize)) { 1465 if (set_blocksize(btp->bt_bdev, sectorsize)) {
1520 printk(KERN_WARNING 1466 printk(KERN_WARNING
1521 "XFS: Cannot set_blocksize to %u on device %s\n", 1467 "XFS: Cannot set_blocksize to %u on device %s\n",
1522 sectorsize, XFS_BUFTARG_NAME(btp)); 1468 sectorsize, XFS_BUFTARG_NAME(btp));
@@ -1536,10 +1482,10 @@ xfs_setsize_buftarg_flags(
1536} 1482}
1537 1483
1538/* 1484/*
1539* When allocating the initial buffer target we have not yet 1485 * When allocating the initial buffer target we have not yet
1540* read in the superblock, so don't know what sized sectors 1486 * read in the superblock, so don't know what sized sectors
1541* are being used is at this early stage. Play safe. 1487 * are being used is at this early stage. Play safe.
1542*/ 1488 */
1543STATIC int 1489STATIC int
1544xfs_setsize_buftarg_early( 1490xfs_setsize_buftarg_early(
1545 xfs_buftarg_t *btp, 1491 xfs_buftarg_t *btp,
@@ -1587,10 +1533,30 @@ xfs_mapping_buftarg(
1587 mapping->a_ops = &mapping_aops; 1533 mapping->a_ops = &mapping_aops;
1588 mapping->backing_dev_info = bdi; 1534 mapping->backing_dev_info = bdi;
1589 mapping_set_gfp_mask(mapping, GFP_NOFS); 1535 mapping_set_gfp_mask(mapping, GFP_NOFS);
1590 btp->pbr_mapping = mapping; 1536 btp->bt_mapping = mapping;
1591 return 0; 1537 return 0;
1592} 1538}
1593 1539
1540STATIC int
1541xfs_alloc_delwrite_queue(
1542 xfs_buftarg_t *btp)
1543{
1544 int error = 0;
1545
1546 INIT_LIST_HEAD(&btp->bt_list);
1547 INIT_LIST_HEAD(&btp->bt_delwrite_queue);
1548 spinlock_init(&btp->bt_delwrite_lock, "delwri_lock");
1549 btp->bt_flags = 0;
1550 btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd");
1551 if (IS_ERR(btp->bt_task)) {
1552 error = PTR_ERR(btp->bt_task);
1553 goto out_error;
1554 }
1555 xfs_register_buftarg(btp);
1556out_error:
1557 return error;
1558}
1559
1594xfs_buftarg_t * 1560xfs_buftarg_t *
1595xfs_alloc_buftarg( 1561xfs_alloc_buftarg(
1596 struct block_device *bdev, 1562 struct block_device *bdev,
@@ -1600,12 +1566,14 @@ xfs_alloc_buftarg(
1600 1566
1601 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); 1567 btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
1602 1568
1603 btp->pbr_dev = bdev->bd_dev; 1569 btp->bt_dev = bdev->bd_dev;
1604 btp->pbr_bdev = bdev; 1570 btp->bt_bdev = bdev;
1605 if (xfs_setsize_buftarg_early(btp, bdev)) 1571 if (xfs_setsize_buftarg_early(btp, bdev))
1606 goto error; 1572 goto error;
1607 if (xfs_mapping_buftarg(btp, bdev)) 1573 if (xfs_mapping_buftarg(btp, bdev))
1608 goto error; 1574 goto error;
1575 if (xfs_alloc_delwrite_queue(btp))
1576 goto error;
1609 xfs_alloc_bufhash(btp, external); 1577 xfs_alloc_bufhash(btp, external);
1610 return btp; 1578 return btp;
1611 1579
@@ -1616,83 +1584,81 @@ error:
1616 1584
1617 1585
1618/* 1586/*
1619 * Pagebuf delayed write buffer handling 1587 * Delayed write buffer handling
1620 */ 1588 */
1621
1622STATIC LIST_HEAD(pbd_delwrite_queue);
1623STATIC DEFINE_SPINLOCK(pbd_delwrite_lock);
1624
1625STATIC void 1589STATIC void
1626pagebuf_delwri_queue( 1590xfs_buf_delwri_queue(
1627 xfs_buf_t *pb, 1591 xfs_buf_t *bp,
1628 int unlock) 1592 int unlock)
1629{ 1593{
1630 PB_TRACE(pb, "delwri_q", (long)unlock); 1594 struct list_head *dwq = &bp->b_target->bt_delwrite_queue;
1631 ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) == 1595 spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
1632 (PBF_DELWRI|PBF_ASYNC)); 1596
1597 XB_TRACE(bp, "delwri_q", (long)unlock);
1598 ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
1633 1599
1634 spin_lock(&pbd_delwrite_lock); 1600 spin_lock(dwlk);
1635 /* If already in the queue, dequeue and place at tail */ 1601 /* If already in the queue, dequeue and place at tail */
1636 if (!list_empty(&pb->pb_list)) { 1602 if (!list_empty(&bp->b_list)) {
1637 ASSERT(pb->pb_flags & _PBF_DELWRI_Q); 1603 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
1638 if (unlock) { 1604 if (unlock)
1639 atomic_dec(&pb->pb_hold); 1605 atomic_dec(&bp->b_hold);
1640 } 1606 list_del(&bp->b_list);
1641 list_del(&pb->pb_list);
1642 } 1607 }
1643 1608
1644 pb->pb_flags |= _PBF_DELWRI_Q; 1609 bp->b_flags |= _XBF_DELWRI_Q;
1645 list_add_tail(&pb->pb_list, &pbd_delwrite_queue); 1610 list_add_tail(&bp->b_list, dwq);
1646 pb->pb_queuetime = jiffies; 1611 bp->b_queuetime = jiffies;
1647 spin_unlock(&pbd_delwrite_lock); 1612 spin_unlock(dwlk);
1648 1613
1649 if (unlock) 1614 if (unlock)
1650 pagebuf_unlock(pb); 1615 xfs_buf_unlock(bp);
1651} 1616}
1652 1617
1653void 1618void
1654pagebuf_delwri_dequeue( 1619xfs_buf_delwri_dequeue(
1655 xfs_buf_t *pb) 1620 xfs_buf_t *bp)
1656{ 1621{
1622 spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
1657 int dequeued = 0; 1623 int dequeued = 0;
1658 1624
1659 spin_lock(&pbd_delwrite_lock); 1625 spin_lock(dwlk);
1660 if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { 1626 if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
1661 ASSERT(pb->pb_flags & _PBF_DELWRI_Q); 1627 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
1662 list_del_init(&pb->pb_list); 1628 list_del_init(&bp->b_list);
1663 dequeued = 1; 1629 dequeued = 1;
1664 } 1630 }
1665 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); 1631 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
1666 spin_unlock(&pbd_delwrite_lock); 1632 spin_unlock(dwlk);
1667 1633
1668 if (dequeued) 1634 if (dequeued)
1669 pagebuf_rele(pb); 1635 xfs_buf_rele(bp);
1670 1636
1671 PB_TRACE(pb, "delwri_dq", (long)dequeued); 1637 XB_TRACE(bp, "delwri_dq", (long)dequeued);
1672} 1638}
1673 1639
1674STATIC void 1640STATIC void
1675pagebuf_runall_queues( 1641xfs_buf_runall_queues(
1676 struct workqueue_struct *queue) 1642 struct workqueue_struct *queue)
1677{ 1643{
1678 flush_workqueue(queue); 1644 flush_workqueue(queue);
1679} 1645}
1680 1646
1681/* Defines for pagebuf daemon */
1682STATIC struct task_struct *xfsbufd_task;
1683STATIC int xfsbufd_force_flush;
1684STATIC int xfsbufd_force_sleep;
1685
1686STATIC int 1647STATIC int
1687xfsbufd_wakeup( 1648xfsbufd_wakeup(
1688 int priority, 1649 int priority,
1689 gfp_t mask) 1650 gfp_t mask)
1690{ 1651{
1691 if (xfsbufd_force_sleep) 1652 xfs_buftarg_t *btp;
1692 return 0; 1653
1693 xfsbufd_force_flush = 1; 1654 spin_lock(&xfs_buftarg_lock);
1694 barrier(); 1655 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
1695 wake_up_process(xfsbufd_task); 1656 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
1657 continue;
1658 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
1659 wake_up_process(btp->bt_task);
1660 }
1661 spin_unlock(&xfs_buftarg_lock);
1696 return 0; 1662 return 0;
1697} 1663}
1698 1664
@@ -1702,67 +1668,70 @@ xfsbufd(
1702{ 1668{
1703 struct list_head tmp; 1669 struct list_head tmp;
1704 unsigned long age; 1670 unsigned long age;
1705 xfs_buftarg_t *target; 1671 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1706 xfs_buf_t *pb, *n; 1672 xfs_buf_t *bp, *n;
1673 struct list_head *dwq = &target->bt_delwrite_queue;
1674 spinlock_t *dwlk = &target->bt_delwrite_lock;
1707 1675
1708 current->flags |= PF_MEMALLOC; 1676 current->flags |= PF_MEMALLOC;
1709 1677
1710 INIT_LIST_HEAD(&tmp); 1678 INIT_LIST_HEAD(&tmp);
1711 do { 1679 do {
1712 if (unlikely(freezing(current))) { 1680 if (unlikely(freezing(current))) {
1713 xfsbufd_force_sleep = 1; 1681 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1714 refrigerator(); 1682 refrigerator();
1715 } else { 1683 } else {
1716 xfsbufd_force_sleep = 0; 1684 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1717 } 1685 }
1718 1686
1719 schedule_timeout_interruptible( 1687 schedule_timeout_interruptible(
1720 xfs_buf_timer_centisecs * msecs_to_jiffies(10)); 1688 xfs_buf_timer_centisecs * msecs_to_jiffies(10));
1721 1689
1722 age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1690 age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1723 spin_lock(&pbd_delwrite_lock); 1691 spin_lock(dwlk);
1724 list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { 1692 list_for_each_entry_safe(bp, n, dwq, b_list) {
1725 PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); 1693 XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
1726 ASSERT(pb->pb_flags & PBF_DELWRI); 1694 ASSERT(bp->b_flags & XBF_DELWRI);
1727 1695
1728 if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) { 1696 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
1729 if (!xfsbufd_force_flush && 1697 if (!test_bit(XBT_FORCE_FLUSH,
1698 &target->bt_flags) &&
1730 time_before(jiffies, 1699 time_before(jiffies,
1731 pb->pb_queuetime + age)) { 1700 bp->b_queuetime + age)) {
1732 pagebuf_unlock(pb); 1701 xfs_buf_unlock(bp);
1733 break; 1702 break;
1734 } 1703 }
1735 1704
1736 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); 1705 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
1737 pb->pb_flags |= PBF_WRITE; 1706 bp->b_flags |= XBF_WRITE;
1738 list_move(&pb->pb_list, &tmp); 1707 list_move(&bp->b_list, &tmp);
1739 } 1708 }
1740 } 1709 }
1741 spin_unlock(&pbd_delwrite_lock); 1710 spin_unlock(dwlk);
1742 1711
1743 while (!list_empty(&tmp)) { 1712 while (!list_empty(&tmp)) {
1744 pb = list_entry(tmp.next, xfs_buf_t, pb_list); 1713 bp = list_entry(tmp.next, xfs_buf_t, b_list);
1745 target = pb->pb_target; 1714 ASSERT(target == bp->b_target);
1746 1715
1747 list_del_init(&pb->pb_list); 1716 list_del_init(&bp->b_list);
1748 pagebuf_iostrategy(pb); 1717 xfs_buf_iostrategy(bp);
1749 1718
1750 blk_run_address_space(target->pbr_mapping); 1719 blk_run_address_space(target->bt_mapping);
1751 } 1720 }
1752 1721
1753 if (as_list_len > 0) 1722 if (as_list_len > 0)
1754 purge_addresses(); 1723 purge_addresses();
1755 1724
1756 xfsbufd_force_flush = 0; 1725 clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
1757 } while (!kthread_should_stop()); 1726 } while (!kthread_should_stop());
1758 1727
1759 return 0; 1728 return 0;
1760} 1729}
1761 1730
1762/* 1731/*
1763 * Go through all incore buffers, and release buffers if they belong to 1732 * Go through all incore buffers, and release buffers if they belong to
1764 * the given device. This is used in filesystem error handling to 1733 * the given device. This is used in filesystem error handling to
1765 * preserve the consistency of its metadata. 1734 * preserve the consistency of its metadata.
1766 */ 1735 */
1767int 1736int
1768xfs_flush_buftarg( 1737xfs_flush_buftarg(
@@ -1770,73 +1739,72 @@ xfs_flush_buftarg(
1770 int wait) 1739 int wait)
1771{ 1740{
1772 struct list_head tmp; 1741 struct list_head tmp;
1773 xfs_buf_t *pb, *n; 1742 xfs_buf_t *bp, *n;
1774 int pincount = 0; 1743 int pincount = 0;
1744 struct list_head *dwq = &target->bt_delwrite_queue;
1745 spinlock_t *dwlk = &target->bt_delwrite_lock;
1775 1746
1776 pagebuf_runall_queues(xfsdatad_workqueue); 1747 xfs_buf_runall_queues(xfsdatad_workqueue);
1777 pagebuf_runall_queues(xfslogd_workqueue); 1748 xfs_buf_runall_queues(xfslogd_workqueue);
1778 1749
1779 INIT_LIST_HEAD(&tmp); 1750 INIT_LIST_HEAD(&tmp);
1780 spin_lock(&pbd_delwrite_lock); 1751 spin_lock(dwlk);
1781 list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { 1752 list_for_each_entry_safe(bp, n, dwq, b_list) {
1782 1753 ASSERT(bp->b_target == target);
1783 if (pb->pb_target != target) 1754 ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q));
1784 continue; 1755 XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp));
1785 1756 if (xfs_buf_ispin(bp)) {
1786 ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q));
1787 PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
1788 if (pagebuf_ispin(pb)) {
1789 pincount++; 1757 pincount++;
1790 continue; 1758 continue;
1791 } 1759 }
1792 1760
1793 list_move(&pb->pb_list, &tmp); 1761 list_move(&bp->b_list, &tmp);
1794 } 1762 }
1795 spin_unlock(&pbd_delwrite_lock); 1763 spin_unlock(dwlk);
1796 1764
1797 /* 1765 /*
1798 * Dropped the delayed write list lock, now walk the temporary list 1766 * Dropped the delayed write list lock, now walk the temporary list
1799 */ 1767 */
1800 list_for_each_entry_safe(pb, n, &tmp, pb_list) { 1768 list_for_each_entry_safe(bp, n, &tmp, b_list) {
1801 pagebuf_lock(pb); 1769 xfs_buf_lock(bp);
1802 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); 1770 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
1803 pb->pb_flags |= PBF_WRITE; 1771 bp->b_flags |= XBF_WRITE;
1804 if (wait) 1772 if (wait)
1805 pb->pb_flags &= ~PBF_ASYNC; 1773 bp->b_flags &= ~XBF_ASYNC;
1806 else 1774 else
1807 list_del_init(&pb->pb_list); 1775 list_del_init(&bp->b_list);
1808 1776
1809 pagebuf_iostrategy(pb); 1777 xfs_buf_iostrategy(bp);
1810 } 1778 }
1811 1779
1812 /* 1780 /*
1813 * Remaining list items must be flushed before returning 1781 * Remaining list items must be flushed before returning
1814 */ 1782 */
1815 while (!list_empty(&tmp)) { 1783 while (!list_empty(&tmp)) {
1816 pb = list_entry(tmp.next, xfs_buf_t, pb_list); 1784 bp = list_entry(tmp.next, xfs_buf_t, b_list);
1817 1785
1818 list_del_init(&pb->pb_list); 1786 list_del_init(&bp->b_list);
1819 xfs_iowait(pb); 1787 xfs_iowait(bp);
1820 xfs_buf_relse(pb); 1788 xfs_buf_relse(bp);
1821 } 1789 }
1822 1790
1823 if (wait) 1791 if (wait)
1824 blk_run_address_space(target->pbr_mapping); 1792 blk_run_address_space(target->bt_mapping);
1825 1793
1826 return pincount; 1794 return pincount;
1827} 1795}
1828 1796
1829int __init 1797int __init
1830pagebuf_init(void) 1798xfs_buf_init(void)
1831{ 1799{
1832 int error = -ENOMEM; 1800 int error = -ENOMEM;
1833 1801
1834#ifdef PAGEBUF_TRACE 1802#ifdef XFS_BUF_TRACE
1835 pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP); 1803 xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP);
1836#endif 1804#endif
1837 1805
1838 pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf"); 1806 xfs_buf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
1839 if (!pagebuf_zone) 1807 if (!xfs_buf_zone)
1840 goto out_free_trace_buf; 1808 goto out_free_trace_buf;
1841 1809
1842 xfslogd_workqueue = create_workqueue("xfslogd"); 1810 xfslogd_workqueue = create_workqueue("xfslogd");
@@ -1847,42 +1815,33 @@ pagebuf_init(void)
1847 if (!xfsdatad_workqueue) 1815 if (!xfsdatad_workqueue)
1848 goto out_destroy_xfslogd_workqueue; 1816 goto out_destroy_xfslogd_workqueue;
1849 1817
1850 xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd"); 1818 xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup);
1851 if (IS_ERR(xfsbufd_task)) { 1819 if (!xfs_buf_shake)
1852 error = PTR_ERR(xfsbufd_task);
1853 goto out_destroy_xfsdatad_workqueue; 1820 goto out_destroy_xfsdatad_workqueue;
1854 }
1855
1856 pagebuf_shake = kmem_shake_register(xfsbufd_wakeup);
1857 if (!pagebuf_shake)
1858 goto out_stop_xfsbufd;
1859 1821
1860 return 0; 1822 return 0;
1861 1823
1862 out_stop_xfsbufd:
1863 kthread_stop(xfsbufd_task);
1864 out_destroy_xfsdatad_workqueue: 1824 out_destroy_xfsdatad_workqueue:
1865 destroy_workqueue(xfsdatad_workqueue); 1825 destroy_workqueue(xfsdatad_workqueue);
1866 out_destroy_xfslogd_workqueue: 1826 out_destroy_xfslogd_workqueue:
1867 destroy_workqueue(xfslogd_workqueue); 1827 destroy_workqueue(xfslogd_workqueue);
1868 out_free_buf_zone: 1828 out_free_buf_zone:
1869 kmem_zone_destroy(pagebuf_zone); 1829 kmem_zone_destroy(xfs_buf_zone);
1870 out_free_trace_buf: 1830 out_free_trace_buf:
1871#ifdef PAGEBUF_TRACE 1831#ifdef XFS_BUF_TRACE
1872 ktrace_free(pagebuf_trace_buf); 1832 ktrace_free(xfs_buf_trace_buf);
1873#endif 1833#endif
1874 return error; 1834 return error;
1875} 1835}
1876 1836
1877void 1837void
1878pagebuf_terminate(void) 1838xfs_buf_terminate(void)
1879{ 1839{
1880 kmem_shake_deregister(pagebuf_shake); 1840 kmem_shake_deregister(xfs_buf_shake);
1881 kthread_stop(xfsbufd_task);
1882 destroy_workqueue(xfsdatad_workqueue); 1841 destroy_workqueue(xfsdatad_workqueue);
1883 destroy_workqueue(xfslogd_workqueue); 1842 destroy_workqueue(xfslogd_workqueue);
1884 kmem_zone_destroy(pagebuf_zone); 1843 kmem_zone_destroy(xfs_buf_zone);
1885#ifdef PAGEBUF_TRACE 1844#ifdef XFS_BUF_TRACE
1886 ktrace_free(pagebuf_trace_buf); 1845 ktrace_free(xfs_buf_trace_buf);
1887#endif 1846#endif
1888} 1847}
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 237a35b915d1..4dd6592d5a4c 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -32,44 +32,47 @@
32 * Base types 32 * Base types
33 */ 33 */
34 34
35#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) 35#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
36 36
37#define page_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE) 37#define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE)
38#define page_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) 38#define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
39#define page_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT) 39#define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT)
40#define page_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK) 40#define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK)
41 41
42typedef enum page_buf_rw_e { 42typedef enum {
43 PBRW_READ = 1, /* transfer into target memory */ 43 XBRW_READ = 1, /* transfer into target memory */
44 PBRW_WRITE = 2, /* transfer from target memory */ 44 XBRW_WRITE = 2, /* transfer from target memory */
45 PBRW_ZERO = 3 /* Zero target memory */ 45 XBRW_ZERO = 3, /* Zero target memory */
46} page_buf_rw_t; 46} xfs_buf_rw_t;
47 47
48 48typedef enum {
49typedef enum page_buf_flags_e { /* pb_flags values */ 49 XBF_READ = (1 << 0), /* buffer intended for reading from device */
50 PBF_READ = (1 << 0), /* buffer intended for reading from device */ 50 XBF_WRITE = (1 << 1), /* buffer intended for writing to device */
51 PBF_WRITE = (1 << 1), /* buffer intended for writing to device */ 51 XBF_MAPPED = (1 << 2), /* buffer mapped (b_addr valid) */
52 PBF_MAPPED = (1 << 2), /* buffer mapped (pb_addr valid) */ 52 XBF_ASYNC = (1 << 4), /* initiator will not wait for completion */
53 PBF_ASYNC = (1 << 4), /* initiator will not wait for completion */ 53 XBF_DONE = (1 << 5), /* all pages in the buffer uptodate */
54 PBF_DONE = (1 << 5), /* all pages in the buffer uptodate */ 54 XBF_DELWRI = (1 << 6), /* buffer has dirty pages */
55 PBF_DELWRI = (1 << 6), /* buffer has dirty pages */ 55 XBF_STALE = (1 << 7), /* buffer has been staled, do not find it */
56 PBF_STALE = (1 << 7), /* buffer has been staled, do not find it */ 56 XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
57 PBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ 57 XBF_ORDERED = (1 << 11), /* use ordered writes */
58 PBF_ORDERED = (1 << 11), /* use ordered writes */ 58 XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
59 PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
60 59
61 /* flags used only as arguments to access routines */ 60 /* flags used only as arguments to access routines */
62 PBF_LOCK = (1 << 14), /* lock requested */ 61 XBF_LOCK = (1 << 14), /* lock requested */
63 PBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */ 62 XBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */
64 PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */ 63 XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */
65 64
66 /* flags used only internally */ 65 /* flags used only internally */
67 _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ 66 _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */
68 _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ 67 _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */
69 _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ 68 _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
70 _PBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ 69 _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
71} page_buf_flags_t; 70} xfs_buf_flags_t;
72 71
72typedef enum {
73 XBT_FORCE_SLEEP = (0 << 1),
74 XBT_FORCE_FLUSH = (1 << 1),
75} xfs_buftarg_flags_t;
73 76
74typedef struct xfs_bufhash { 77typedef struct xfs_bufhash {
75 struct list_head bh_list; 78 struct list_head bh_list;
@@ -77,477 +80,350 @@ typedef struct xfs_bufhash {
77} xfs_bufhash_t; 80} xfs_bufhash_t;
78 81
79typedef struct xfs_buftarg { 82typedef struct xfs_buftarg {
80 dev_t pbr_dev; 83 dev_t bt_dev;
81 struct block_device *pbr_bdev; 84 struct block_device *bt_bdev;
82 struct address_space *pbr_mapping; 85 struct address_space *bt_mapping;
83 unsigned int pbr_bsize; 86 unsigned int bt_bsize;
84 unsigned int pbr_sshift; 87 unsigned int bt_sshift;
85 size_t pbr_smask; 88 size_t bt_smask;
86 89
87 /* per-device buffer hash table */ 90 /* per device buffer hash table */
88 uint bt_hashmask; 91 uint bt_hashmask;
89 uint bt_hashshift; 92 uint bt_hashshift;
90 xfs_bufhash_t *bt_hash; 93 xfs_bufhash_t *bt_hash;
94
95 /* per device delwri queue */
96 struct task_struct *bt_task;
97 struct list_head bt_list;
98 struct list_head bt_delwrite_queue;
99 spinlock_t bt_delwrite_lock;
100 unsigned long bt_flags;
91} xfs_buftarg_t; 101} xfs_buftarg_t;
92 102
93/* 103/*
94 * xfs_buf_t: Buffer structure for page cache-based buffers 104 * xfs_buf_t: Buffer structure for pagecache-based buffers
105 *
106 * This buffer structure is used by the pagecache buffer management routines
107 * to refer to an assembly of pages forming a logical buffer.
95 * 108 *
96 * This buffer structure is used by the page cache buffer management routines 109 * The buffer structure is used on a temporary basis only, and discarded when
97 * to refer to an assembly of pages forming a logical buffer. The actual I/O 110 * released. The real data storage is recorded in the pagecache. Buffers are
98 * is performed with buffer_head structures, as required by drivers.
99 *
100 * The buffer structure is used on temporary basis only, and discarded when
101 * released. The real data storage is recorded in the page cache. Metadata is
102 * hashed to the block device on which the file system resides. 111 * hashed to the block device on which the file system resides.
103 */ 112 */
104 113
105struct xfs_buf; 114struct xfs_buf;
115typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
116typedef void (*xfs_buf_relse_t)(struct xfs_buf *);
117typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);
106 118
107/* call-back function on I/O completion */ 119#define XB_PAGES 2
108typedef void (*page_buf_iodone_t)(struct xfs_buf *);
109/* call-back function on I/O completion */
110typedef void (*page_buf_relse_t)(struct xfs_buf *);
111/* pre-write function */
112typedef int (*page_buf_bdstrat_t)(struct xfs_buf *);
113
114#define PB_PAGES 2
115 120
116typedef struct xfs_buf { 121typedef struct xfs_buf {
117 struct semaphore pb_sema; /* semaphore for lockables */ 122 struct semaphore b_sema; /* semaphore for lockables */
118 unsigned long pb_queuetime; /* time buffer was queued */ 123 unsigned long b_queuetime; /* time buffer was queued */
119 atomic_t pb_pin_count; /* pin count */ 124 atomic_t b_pin_count; /* pin count */
120 wait_queue_head_t pb_waiters; /* unpin waiters */ 125 wait_queue_head_t b_waiters; /* unpin waiters */
121 struct list_head pb_list; 126 struct list_head b_list;
122 page_buf_flags_t pb_flags; /* status flags */ 127 xfs_buf_flags_t b_flags; /* status flags */
123 struct list_head pb_hash_list; /* hash table list */ 128 struct list_head b_hash_list; /* hash table list */
124 xfs_bufhash_t *pb_hash; /* hash table list start */ 129 xfs_bufhash_t *b_hash; /* hash table list start */
125 xfs_buftarg_t *pb_target; /* buffer target (device) */ 130 xfs_buftarg_t *b_target; /* buffer target (device) */
126 atomic_t pb_hold; /* reference count */ 131 atomic_t b_hold; /* reference count */
127 xfs_daddr_t pb_bn; /* block number for I/O */ 132 xfs_daddr_t b_bn; /* block number for I/O */
128 loff_t pb_file_offset; /* offset in file */ 133 xfs_off_t b_file_offset; /* offset in file */
129 size_t pb_buffer_length; /* size of buffer in bytes */ 134 size_t b_buffer_length;/* size of buffer in bytes */
130 size_t pb_count_desired; /* desired transfer size */ 135 size_t b_count_desired;/* desired transfer size */
131 void *pb_addr; /* virtual address of buffer */ 136 void *b_addr; /* virtual address of buffer */
132 struct work_struct pb_iodone_work; 137 struct work_struct b_iodone_work;
133 atomic_t pb_io_remaining;/* #outstanding I/O requests */ 138 atomic_t b_io_remaining; /* #outstanding I/O requests */
134 page_buf_iodone_t pb_iodone; /* I/O completion function */ 139 xfs_buf_iodone_t b_iodone; /* I/O completion function */
135 page_buf_relse_t pb_relse; /* releasing function */ 140 xfs_buf_relse_t b_relse; /* releasing function */
136 page_buf_bdstrat_t pb_strat; /* pre-write function */ 141 xfs_buf_bdstrat_t b_strat; /* pre-write function */
137 struct semaphore pb_iodonesema; /* Semaphore for I/O waiters */ 142 struct semaphore b_iodonesema; /* Semaphore for I/O waiters */
138 void *pb_fspriv; 143 void *b_fspriv;
139 void *pb_fspriv2; 144 void *b_fspriv2;
140 void *pb_fspriv3; 145 void *b_fspriv3;
141 unsigned short pb_error; /* error code on I/O */ 146 unsigned short b_error; /* error code on I/O */
142 unsigned short pb_locked; /* page array is locked */ 147 unsigned short b_locked; /* page array is locked */
143 unsigned int pb_page_count; /* size of page array */ 148 unsigned int b_page_count; /* size of page array */
144 unsigned int pb_offset; /* page offset in first page */ 149 unsigned int b_offset; /* page offset in first page */
145 struct page **pb_pages; /* array of page pointers */ 150 struct page **b_pages; /* array of page pointers */
146 struct page *pb_page_array[PB_PAGES]; /* inline pages */ 151 struct page *b_page_array[XB_PAGES]; /* inline pages */
147#ifdef PAGEBUF_LOCK_TRACKING 152#ifdef XFS_BUF_LOCK_TRACKING
148 int pb_last_holder; 153 int b_last_holder;
149#endif 154#endif
150} xfs_buf_t; 155} xfs_buf_t;
151 156
152 157
153/* Finding and Reading Buffers */ 158/* Finding and Reading Buffers */
154 159extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
155extern xfs_buf_t *_pagebuf_find( /* find buffer for block if */ 160 xfs_buf_flags_t, xfs_buf_t *);
156 /* the block is in memory */
157 xfs_buftarg_t *, /* inode for block */
158 loff_t, /* starting offset of range */
159 size_t, /* length of range */
160 page_buf_flags_t, /* PBF_LOCK */
161 xfs_buf_t *); /* newly allocated buffer */
162
163#define xfs_incore(buftarg,blkno,len,lockit) \ 161#define xfs_incore(buftarg,blkno,len,lockit) \
164 _pagebuf_find(buftarg, blkno ,len, lockit, NULL) 162 _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
165
166extern xfs_buf_t *xfs_buf_get_flags( /* allocate a buffer */
167 xfs_buftarg_t *, /* inode for buffer */
168 loff_t, /* starting offset of range */
169 size_t, /* length of range */
170 page_buf_flags_t); /* PBF_LOCK, PBF_READ, */
171 /* PBF_ASYNC */
172 163
164extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t,
165 xfs_buf_flags_t);
173#define xfs_buf_get(target, blkno, len, flags) \ 166#define xfs_buf_get(target, blkno, len, flags) \
174 xfs_buf_get_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) 167 xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
175
176extern xfs_buf_t *xfs_buf_read_flags( /* allocate and read a buffer */
177 xfs_buftarg_t *, /* inode for buffer */
178 loff_t, /* starting offset of range */
179 size_t, /* length of range */
180 page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC */
181 168
169extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t,
170 xfs_buf_flags_t);
182#define xfs_buf_read(target, blkno, len, flags) \ 171#define xfs_buf_read(target, blkno, len, flags) \
183 xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) 172 xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
184
185extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */
186 /* no memory or disk address */
187 size_t len,
188 xfs_buftarg_t *); /* mount point "fake" inode */
189
190extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct */
191 /* without disk address */
192 size_t len,
193 xfs_buftarg_t *); /* mount point "fake" inode */
194
195extern int pagebuf_associate_memory(
196 xfs_buf_t *,
197 void *,
198 size_t);
199
200extern void pagebuf_hold( /* increment reference count */
201 xfs_buf_t *); /* buffer to hold */
202 173
203extern void pagebuf_readahead( /* read ahead into cache */ 174extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
204 xfs_buftarg_t *, /* target for buffer (or NULL) */ 175extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
205 loff_t, /* starting offset of range */ 176extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
206 size_t, /* length of range */ 177extern void xfs_buf_hold(xfs_buf_t *);
207 page_buf_flags_t); /* additional read flags */ 178extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t,
179 xfs_buf_flags_t);
208 180
209/* Releasing Buffers */ 181/* Releasing Buffers */
210 182extern void xfs_buf_free(xfs_buf_t *);
211extern void pagebuf_free( /* deallocate a buffer */ 183extern void xfs_buf_rele(xfs_buf_t *);
212 xfs_buf_t *); /* buffer to deallocate */
213
214extern void pagebuf_rele( /* release hold on a buffer */
215 xfs_buf_t *); /* buffer to release */
216 184
217/* Locking and Unlocking Buffers */ 185/* Locking and Unlocking Buffers */
218 186extern int xfs_buf_cond_lock(xfs_buf_t *);
219extern int pagebuf_cond_lock( /* lock buffer, if not locked */ 187extern int xfs_buf_lock_value(xfs_buf_t *);
220 /* (returns -EBUSY if locked) */ 188extern void xfs_buf_lock(xfs_buf_t *);
221 xfs_buf_t *); /* buffer to lock */ 189extern void xfs_buf_unlock(xfs_buf_t *);
222
223extern int pagebuf_lock_value( /* return count on lock */
224 xfs_buf_t *); /* buffer to check */
225
226extern int pagebuf_lock( /* lock buffer */
227 xfs_buf_t *); /* buffer to lock */
228
229extern void pagebuf_unlock( /* unlock buffer */
230 xfs_buf_t *); /* buffer to unlock */
231 190
232/* Buffer Read and Write Routines */ 191/* Buffer Read and Write Routines */
233 192extern void xfs_buf_ioend(xfs_buf_t *, int);
234extern void pagebuf_iodone( /* mark buffer I/O complete */ 193extern void xfs_buf_ioerror(xfs_buf_t *, int);
235 xfs_buf_t *, /* buffer to mark */ 194extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t);
236 int); /* run completion locally, or in 195extern int xfs_buf_iorequest(xfs_buf_t *);
237 * a helper thread. */ 196extern int xfs_buf_iowait(xfs_buf_t *);
238 197extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t,
239extern void pagebuf_ioerror( /* mark buffer in error (or not) */ 198 xfs_buf_rw_t);
240 xfs_buf_t *, /* buffer to mark */ 199
241 int); /* error to store (0 if none) */ 200static inline int xfs_buf_iostrategy(xfs_buf_t *bp)
242
243extern int pagebuf_iostart( /* start I/O on a buffer */
244 xfs_buf_t *, /* buffer to start */
245 page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC, */
246 /* PBF_READ, PBF_WRITE, */
247 /* PBF_DELWRI */
248
249extern int pagebuf_iorequest( /* start real I/O */
250 xfs_buf_t *); /* buffer to convey to device */
251
252extern int pagebuf_iowait( /* wait for buffer I/O done */
253 xfs_buf_t *); /* buffer to wait on */
254
255extern void pagebuf_iomove( /* move data in/out of pagebuf */
256 xfs_buf_t *, /* buffer to manipulate */
257 size_t, /* starting buffer offset */
258 size_t, /* length in buffer */
259 caddr_t, /* data pointer */
260 page_buf_rw_t); /* direction */
261
262static inline int pagebuf_iostrategy(xfs_buf_t *pb)
263{ 201{
264 return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb); 202 return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp);
265} 203}
266 204
267static inline int pagebuf_geterror(xfs_buf_t *pb) 205static inline int xfs_buf_geterror(xfs_buf_t *bp)
268{ 206{
269 return pb ? pb->pb_error : ENOMEM; 207 return bp ? bp->b_error : ENOMEM;
270} 208}
271 209
272/* Buffer Utility Routines */ 210/* Buffer Utility Routines */
273 211extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
274extern caddr_t pagebuf_offset( /* pointer at offset in buffer */
275 xfs_buf_t *, /* buffer to offset into */
276 size_t); /* offset */
277 212
278/* Pinning Buffer Storage in Memory */ 213/* Pinning Buffer Storage in Memory */
279 214extern void xfs_buf_pin(xfs_buf_t *);
280extern void pagebuf_pin( /* pin buffer in memory */ 215extern void xfs_buf_unpin(xfs_buf_t *);
281 xfs_buf_t *); /* buffer to pin */ 216extern int xfs_buf_ispin(xfs_buf_t *);
282
283extern void pagebuf_unpin( /* unpin buffered data */
284 xfs_buf_t *); /* buffer to unpin */
285
286extern int pagebuf_ispin( /* check if buffer is pinned */
287 xfs_buf_t *); /* buffer to check */
288 217
289/* Delayed Write Buffer Routines */ 218/* Delayed Write Buffer Routines */
290 219extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
291extern void pagebuf_delwri_dequeue(xfs_buf_t *);
292 220
293/* Buffer Daemon Setup Routines */ 221/* Buffer Daemon Setup Routines */
222extern int xfs_buf_init(void);
223extern void xfs_buf_terminate(void);
294 224
295extern int pagebuf_init(void); 225#ifdef XFS_BUF_TRACE
296extern void pagebuf_terminate(void); 226extern ktrace_t *xfs_buf_trace_buf;
297 227extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
298
299#ifdef PAGEBUF_TRACE
300extern ktrace_t *pagebuf_trace_buf;
301extern void pagebuf_trace(
302 xfs_buf_t *, /* buffer being traced */
303 char *, /* description of operation */
304 void *, /* arbitrary diagnostic value */
305 void *); /* return address */
306#else 228#else
307# define pagebuf_trace(pb, id, ptr, ra) do { } while (0) 229#define xfs_buf_trace(bp,id,ptr,ra) do { } while (0)
308#endif 230#endif
309 231
310#define pagebuf_target_name(target) \ 232#define xfs_buf_target_name(target) \
311 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; }) 233 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })
312 234
313 235
236#define XFS_B_ASYNC XBF_ASYNC
237#define XFS_B_DELWRI XBF_DELWRI
238#define XFS_B_READ XBF_READ
239#define XFS_B_WRITE XBF_WRITE
240#define XFS_B_STALE XBF_STALE
314 241
315/* These are just for xfs_syncsub... it sets an internal variable 242#define XFS_BUF_TRYLOCK XBF_TRYLOCK
316 * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t 243#define XFS_INCORE_TRYLOCK XBF_TRYLOCK
317 */ 244#define XFS_BUF_LOCK XBF_LOCK
318#define XFS_B_ASYNC PBF_ASYNC 245#define XFS_BUF_MAPPED XBF_MAPPED
319#define XFS_B_DELWRI PBF_DELWRI
320#define XFS_B_READ PBF_READ
321#define XFS_B_WRITE PBF_WRITE
322#define XFS_B_STALE PBF_STALE
323
324#define XFS_BUF_TRYLOCK PBF_TRYLOCK
325#define XFS_INCORE_TRYLOCK PBF_TRYLOCK
326#define XFS_BUF_LOCK PBF_LOCK
327#define XFS_BUF_MAPPED PBF_MAPPED
328
329#define BUF_BUSY PBF_DONT_BLOCK
330
331#define XFS_BUF_BFLAGS(x) ((x)->pb_flags)
332#define XFS_BUF_ZEROFLAGS(x) \
333 ((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI))
334
335#define XFS_BUF_STALE(x) ((x)->pb_flags |= XFS_B_STALE)
336#define XFS_BUF_UNSTALE(x) ((x)->pb_flags &= ~XFS_B_STALE)
337#define XFS_BUF_ISSTALE(x) ((x)->pb_flags & XFS_B_STALE)
338#define XFS_BUF_SUPER_STALE(x) do { \
339 XFS_BUF_STALE(x); \
340 pagebuf_delwri_dequeue(x); \
341 XFS_BUF_DONE(x); \
342 } while (0)
343 246
344#define XFS_BUF_MANAGE PBF_FS_MANAGED 247#define BUF_BUSY XBF_DONT_BLOCK
345#define XFS_BUF_UNMANAGE(x) ((x)->pb_flags &= ~PBF_FS_MANAGED) 248
346 249#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags)
347#define XFS_BUF_DELAYWRITE(x) ((x)->pb_flags |= PBF_DELWRI) 250#define XFS_BUF_ZEROFLAGS(bp) \
348#define XFS_BUF_UNDELAYWRITE(x) pagebuf_delwri_dequeue(x) 251 ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI))
349#define XFS_BUF_ISDELAYWRITE(x) ((x)->pb_flags & PBF_DELWRI) 252
350 253#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE)
351#define XFS_BUF_ERROR(x,no) pagebuf_ioerror(x,no) 254#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE)
352#define XFS_BUF_GETERROR(x) pagebuf_geterror(x) 255#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE)
353#define XFS_BUF_ISERROR(x) (pagebuf_geterror(x)?1:0) 256#define XFS_BUF_SUPER_STALE(bp) do { \
354 257 XFS_BUF_STALE(bp); \
355#define XFS_BUF_DONE(x) ((x)->pb_flags |= PBF_DONE) 258 xfs_buf_delwri_dequeue(bp); \
356#define XFS_BUF_UNDONE(x) ((x)->pb_flags &= ~PBF_DONE) 259 XFS_BUF_DONE(bp); \
357#define XFS_BUF_ISDONE(x) ((x)->pb_flags & PBF_DONE) 260 } while (0)
358
359#define XFS_BUF_BUSY(x) do { } while (0)
360#define XFS_BUF_UNBUSY(x) do { } while (0)
361#define XFS_BUF_ISBUSY(x) (1)
362
363#define XFS_BUF_ASYNC(x) ((x)->pb_flags |= PBF_ASYNC)
364#define XFS_BUF_UNASYNC(x) ((x)->pb_flags &= ~PBF_ASYNC)
365#define XFS_BUF_ISASYNC(x) ((x)->pb_flags & PBF_ASYNC)
366
367#define XFS_BUF_ORDERED(x) ((x)->pb_flags |= PBF_ORDERED)
368#define XFS_BUF_UNORDERED(x) ((x)->pb_flags &= ~PBF_ORDERED)
369#define XFS_BUF_ISORDERED(x) ((x)->pb_flags & PBF_ORDERED)
370
371#define XFS_BUF_SHUT(x) printk("XFS_BUF_SHUT not implemented yet\n")
372#define XFS_BUF_UNSHUT(x) printk("XFS_BUF_UNSHUT not implemented yet\n")
373#define XFS_BUF_ISSHUT(x) (0)
374
375#define XFS_BUF_HOLD(x) pagebuf_hold(x)
376#define XFS_BUF_READ(x) ((x)->pb_flags |= PBF_READ)
377#define XFS_BUF_UNREAD(x) ((x)->pb_flags &= ~PBF_READ)
378#define XFS_BUF_ISREAD(x) ((x)->pb_flags & PBF_READ)
379
380#define XFS_BUF_WRITE(x) ((x)->pb_flags |= PBF_WRITE)
381#define XFS_BUF_UNWRITE(x) ((x)->pb_flags &= ~PBF_WRITE)
382#define XFS_BUF_ISWRITE(x) ((x)->pb_flags & PBF_WRITE)
383
384#define XFS_BUF_ISUNINITIAL(x) (0)
385#define XFS_BUF_UNUNINITIAL(x) (0)
386
387#define XFS_BUF_BP_ISMAPPED(bp) 1
388
389#define XFS_BUF_IODONE_FUNC(buf) (buf)->pb_iodone
390#define XFS_BUF_SET_IODONE_FUNC(buf, func) \
391 (buf)->pb_iodone = (func)
392#define XFS_BUF_CLR_IODONE_FUNC(buf) \
393 (buf)->pb_iodone = NULL
394#define XFS_BUF_SET_BDSTRAT_FUNC(buf, func) \
395 (buf)->pb_strat = (func)
396#define XFS_BUF_CLR_BDSTRAT_FUNC(buf) \
397 (buf)->pb_strat = NULL
398
399#define XFS_BUF_FSPRIVATE(buf, type) \
400 ((type)(buf)->pb_fspriv)
401#define XFS_BUF_SET_FSPRIVATE(buf, value) \
402 (buf)->pb_fspriv = (void *)(value)
403#define XFS_BUF_FSPRIVATE2(buf, type) \
404 ((type)(buf)->pb_fspriv2)
405#define XFS_BUF_SET_FSPRIVATE2(buf, value) \
406 (buf)->pb_fspriv2 = (void *)(value)
407#define XFS_BUF_FSPRIVATE3(buf, type) \
408 ((type)(buf)->pb_fspriv3)
409#define XFS_BUF_SET_FSPRIVATE3(buf, value) \
410 (buf)->pb_fspriv3 = (void *)(value)
411#define XFS_BUF_SET_START(buf)
412
413#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \
414 (buf)->pb_relse = (value)
415
416#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr)
417
418static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
419{
420 if (bp->pb_flags & PBF_MAPPED)
421 return XFS_BUF_PTR(bp) + offset;
422 return (xfs_caddr_t) pagebuf_offset(bp, offset);
423}
424 261
425#define XFS_BUF_SET_PTR(bp, val, count) \ 262#define XFS_BUF_MANAGE XBF_FS_MANAGED
426 pagebuf_associate_memory(bp, val, count) 263#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED)
427#define XFS_BUF_ADDR(bp) ((bp)->pb_bn) 264
428#define XFS_BUF_SET_ADDR(bp, blk) \ 265#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI)
429 ((bp)->pb_bn = (xfs_daddr_t)(blk)) 266#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp)
430#define XFS_BUF_OFFSET(bp) ((bp)->pb_file_offset) 267#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI)
431#define XFS_BUF_SET_OFFSET(bp, off) \ 268
432 ((bp)->pb_file_offset = (off)) 269#define XFS_BUF_ERROR(bp,no) xfs_buf_ioerror(bp,no)
433#define XFS_BUF_COUNT(bp) ((bp)->pb_count_desired) 270#define XFS_BUF_GETERROR(bp) xfs_buf_geterror(bp)
434#define XFS_BUF_SET_COUNT(bp, cnt) \ 271#define XFS_BUF_ISERROR(bp) (xfs_buf_geterror(bp) ? 1 : 0)
435 ((bp)->pb_count_desired = (cnt)) 272
436#define XFS_BUF_SIZE(bp) ((bp)->pb_buffer_length) 273#define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE)
437#define XFS_BUF_SET_SIZE(bp, cnt) \ 274#define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE)
438 ((bp)->pb_buffer_length = (cnt)) 275#define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE)
439#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) 276
440#define XFS_BUF_SET_VTYPE(bp, type) 277#define XFS_BUF_BUSY(bp) do { } while (0)
441#define XFS_BUF_SET_REF(bp, ref) 278#define XFS_BUF_UNBUSY(bp) do { } while (0)
442 279#define XFS_BUF_ISBUSY(bp) (1)
443#define XFS_BUF_ISPINNED(bp) pagebuf_ispin(bp) 280
444 281#define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC)
445#define XFS_BUF_VALUSEMA(bp) pagebuf_lock_value(bp) 282#define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC)
446#define XFS_BUF_CPSEMA(bp) (pagebuf_cond_lock(bp) == 0) 283#define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC)
447#define XFS_BUF_VSEMA(bp) pagebuf_unlock(bp) 284
448#define XFS_BUF_PSEMA(bp,x) pagebuf_lock(bp) 285#define XFS_BUF_ORDERED(bp) ((bp)->b_flags |= XBF_ORDERED)
449#define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema); 286#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED)
450 287#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED)
451/* setup the buffer target from a buftarg structure */ 288
452#define XFS_BUF_SET_TARGET(bp, target) \ 289#define XFS_BUF_SHUT(bp) do { } while (0)
453 (bp)->pb_target = (target) 290#define XFS_BUF_UNSHUT(bp) do { } while (0)
454#define XFS_BUF_TARGET(bp) ((bp)->pb_target) 291#define XFS_BUF_ISSHUT(bp) (0)
455#define XFS_BUFTARG_NAME(target) \ 292
456 pagebuf_target_name(target) 293#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp)
457 294#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ)
458#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) 295#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ)
459#define XFS_BUF_SET_VTYPE(bp, type) 296#define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ)
460#define XFS_BUF_SET_REF(bp, ref) 297
461 298#define XFS_BUF_WRITE(bp) ((bp)->b_flags |= XBF_WRITE)
462static inline int xfs_bawrite(void *mp, xfs_buf_t *bp) 299#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE)
300#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE)
301
302#define XFS_BUF_ISUNINITIAL(bp) (0)
303#define XFS_BUF_UNUNINITIAL(bp) (0)
304
305#define XFS_BUF_BP_ISMAPPED(bp) (1)
306
307#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone)
308#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func))
309#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL)
310#define XFS_BUF_SET_BDSTRAT_FUNC(bp, func) ((bp)->b_strat = (func))
311#define XFS_BUF_CLR_BDSTRAT_FUNC(bp) ((bp)->b_strat = NULL)
312
313#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv)
314#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val))
315#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2)
316#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val))
317#define XFS_BUF_FSPRIVATE3(bp, type) ((type)(bp)->b_fspriv3)
318#define XFS_BUF_SET_FSPRIVATE3(bp, val) ((bp)->b_fspriv3 = (void*)(val))
319#define XFS_BUF_SET_START(bp) do { } while (0)
320#define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func))
321
322#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr)
323#define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt)
324#define XFS_BUF_ADDR(bp) ((bp)->b_bn)
325#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno))
326#define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset)
327#define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off))
328#define XFS_BUF_COUNT(bp) ((bp)->b_count_desired)
329#define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt))
330#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length)
331#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt))
332
333#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) do { } while (0)
334#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
335#define XFS_BUF_SET_REF(bp, ref) do { } while (0)
336
337#define XFS_BUF_ISPINNED(bp) xfs_buf_ispin(bp)
338
339#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp)
340#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)
341#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp)
342#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp)
343#define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema);
344
345#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target))
346#define XFS_BUF_TARGET(bp) ((bp)->b_target)
347#define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target)
348
349static inline int xfs_bawrite(void *mp, xfs_buf_t *bp)
463{ 350{
464 bp->pb_fspriv3 = mp; 351 bp->b_fspriv3 = mp;
465 bp->pb_strat = xfs_bdstrat_cb; 352 bp->b_strat = xfs_bdstrat_cb;
466 pagebuf_delwri_dequeue(bp); 353 xfs_buf_delwri_dequeue(bp);
467 return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES); 354 return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
468} 355}
469 356
470static inline void xfs_buf_relse(xfs_buf_t *bp) 357static inline void xfs_buf_relse(xfs_buf_t *bp)
471{ 358{
472 if (!bp->pb_relse) 359 if (!bp->b_relse)
473 pagebuf_unlock(bp); 360 xfs_buf_unlock(bp);
474 pagebuf_rele(bp); 361 xfs_buf_rele(bp);
475} 362}
476 363
477#define xfs_bpin(bp) pagebuf_pin(bp) 364#define xfs_bpin(bp) xfs_buf_pin(bp)
478#define xfs_bunpin(bp) pagebuf_unpin(bp) 365#define xfs_bunpin(bp) xfs_buf_unpin(bp)
479 366
480#define xfs_buftrace(id, bp) \ 367#define xfs_buftrace(id, bp) \
481 pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0)) 368 xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
482 369
483#define xfs_biodone(pb) \ 370#define xfs_biodone(bp) xfs_buf_ioend(bp, 0)
484 pagebuf_iodone(pb, 0)
485 371
486#define xfs_biomove(pb, off, len, data, rw) \ 372#define xfs_biomove(bp, off, len, data, rw) \
487 pagebuf_iomove((pb), (off), (len), (data), \ 373 xfs_buf_iomove((bp), (off), (len), (data), \
488 ((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ) 374 ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ)
489 375
490#define xfs_biozero(pb, off, len) \ 376#define xfs_biozero(bp, off, len) \
491 pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO) 377 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
492 378
493 379
494static inline int XFS_bwrite(xfs_buf_t *pb) 380static inline int XFS_bwrite(xfs_buf_t *bp)
495{ 381{
496 int iowait = (pb->pb_flags & PBF_ASYNC) == 0; 382 int iowait = (bp->b_flags & XBF_ASYNC) == 0;
497 int error = 0; 383 int error = 0;
498 384
499 if (!iowait) 385 if (!iowait)
500 pb->pb_flags |= _PBF_RUN_QUEUES; 386 bp->b_flags |= _XBF_RUN_QUEUES;
501 387
502 pagebuf_delwri_dequeue(pb); 388 xfs_buf_delwri_dequeue(bp);
503 pagebuf_iostrategy(pb); 389 xfs_buf_iostrategy(bp);
504 if (iowait) { 390 if (iowait) {
505 error = pagebuf_iowait(pb); 391 error = xfs_buf_iowait(bp);
506 xfs_buf_relse(pb); 392 xfs_buf_relse(bp);
507 } 393 }
508 return error; 394 return error;
509} 395}
510 396
511#define XFS_bdwrite(pb) \ 397#define XFS_bdwrite(bp) xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC)
512 pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC)
513 398
514static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp) 399static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
515{ 400{
516 bp->pb_strat = xfs_bdstrat_cb; 401 bp->b_strat = xfs_bdstrat_cb;
517 bp->pb_fspriv3 = mp; 402 bp->b_fspriv3 = mp;
518 403 return xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC);
519 return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC);
520} 404}
521 405
522#define XFS_bdstrat(bp) pagebuf_iorequest(bp) 406#define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
523 407
524#define xfs_iowait(pb) pagebuf_iowait(pb) 408#define xfs_iowait(bp) xfs_buf_iowait(bp)
525 409
526#define xfs_baread(target, rablkno, ralen) \ 410#define xfs_baread(target, rablkno, ralen) \
527 pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK) 411 xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK)
528
529#define xfs_buf_get_empty(len, target) pagebuf_get_empty((len), (target))
530#define xfs_buf_get_noaddr(len, target) pagebuf_get_no_daddr((len), (target))
531#define xfs_buf_free(bp) pagebuf_free(bp)
532 412
533 413
534/* 414/*
535 * Handling of buftargs. 415 * Handling of buftargs.
536 */ 416 */
537
538extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); 417extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
539extern void xfs_free_buftarg(xfs_buftarg_t *, int); 418extern void xfs_free_buftarg(xfs_buftarg_t *, int);
540extern void xfs_wait_buftarg(xfs_buftarg_t *); 419extern void xfs_wait_buftarg(xfs_buftarg_t *);
541extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 420extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
542extern int xfs_flush_buftarg(xfs_buftarg_t *, int); 421extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
543 422
544#define xfs_getsize_buftarg(buftarg) \ 423#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
545 block_size((buftarg)->pbr_bdev) 424#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
546#define xfs_readonly_buftarg(buftarg) \ 425
547 bdev_read_only((buftarg)->pbr_bdev) 426#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1)
548#define xfs_binval(buftarg) \ 427#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1)
549 xfs_flush_buftarg(buftarg, 1)
550#define XFS_bflush(buftarg) \
551 xfs_flush_buftarg(buftarg, 1)
552 428
553#endif /* __XFS_BUF_H__ */ 429#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
index 4af491024727..e7f3da61c6c3 100644
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ b/fs/xfs/linux-2.6/xfs_cred.h
@@ -18,6 +18,8 @@
18#ifndef __XFS_CRED_H__ 18#ifndef __XFS_CRED_H__
19#define __XFS_CRED_H__ 19#define __XFS_CRED_H__
20 20
21#include <linux/capability.h>
22
21/* 23/*
22 * Credentials 24 * Credentials
23 */ 25 */
@@ -27,7 +29,7 @@ typedef struct cred {
27 29
28extern struct cred *sys_cred; 30extern struct cred *sys_cred;
29 31
30/* this is a hack.. (assums sys_cred is the only cred_t in the system) */ 32/* this is a hack.. (assumes sys_cred is the only cred_t in the system) */
31static __inline int capable_cred(cred_t *cr, int cid) 33static __inline int capable_cred(cred_t *cr, int cid)
32{ 34{
33 return (cr == sys_cred) ? 1 : capable(cid); 35 return (cr == sys_cred) ? 1 : capable(cid);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 06111d0bbae4..ced4404339c7 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -509,16 +509,14 @@ linvfs_open_exec(
509 vnode_t *vp = LINVFS_GET_VP(inode); 509 vnode_t *vp = LINVFS_GET_VP(inode);
510 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); 510 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
511 int error = 0; 511 int error = 0;
512 bhv_desc_t *bdp;
513 xfs_inode_t *ip; 512 xfs_inode_t *ip;
514 513
515 if (vp->v_vfsp->vfs_flag & VFS_DMI) { 514 if (vp->v_vfsp->vfs_flag & VFS_DMI) {
516 bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); 515 ip = xfs_vtoi(vp);
517 if (!bdp) { 516 if (!ip) {
518 error = -EINVAL; 517 error = -EINVAL;
519 goto open_exec_out; 518 goto open_exec_out;
520 } 519 }
521 ip = XFS_BHVTOI(bdp);
522 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) { 520 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) {
523 error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, 521 error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp,
524 0, 0, 0, NULL); 522 0, 0, 0, NULL);
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index f89340c61bf2..4fa4b1a5187e 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -79,8 +79,7 @@ fs_flushinval_pages(
79 struct inode *ip = LINVFS_GET_IP(vp); 79 struct inode *ip = LINVFS_GET_IP(vp);
80 80
81 if (VN_CACHED(vp)) { 81 if (VN_CACHED(vp)) {
82 filemap_fdatawrite(ip->i_mapping); 82 filemap_write_and_wait(ip->i_mapping);
83 filemap_fdatawait(ip->i_mapping);
84 83
85 truncate_inode_pages(ip->i_mapping, first); 84 truncate_inode_pages(ip->i_mapping, first);
86 } 85 }
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index b78b5eb9e96c..4db47790415c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -52,6 +52,7 @@
52#include "xfs_dfrag.h" 52#include "xfs_dfrag.h"
53#include "xfs_fsops.h" 53#include "xfs_fsops.h"
54 54
55#include <linux/capability.h>
55#include <linux/dcache.h> 56#include <linux/dcache.h>
56#include <linux/mount.h> 57#include <linux/mount.h>
57#include <linux/namei.h> 58#include <linux/namei.h>
@@ -145,13 +146,10 @@ xfs_find_handle(
145 146
146 if (cmd != XFS_IOC_PATH_TO_FSHANDLE) { 147 if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {
147 xfs_inode_t *ip; 148 xfs_inode_t *ip;
148 bhv_desc_t *bhv;
149 int lock_mode; 149 int lock_mode;
150 150
151 /* need to get access to the xfs_inode to read the generation */ 151 /* need to get access to the xfs_inode to read the generation */
152 bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops); 152 ip = xfs_vtoi(vp);
153 ASSERT(bhv);
154 ip = XFS_BHVTOI(bhv);
155 ASSERT(ip); 153 ASSERT(ip);
156 lock_mode = xfs_ilock_map_shared(ip); 154 lock_mode = xfs_ilock_map_shared(ip);
157 155
@@ -530,6 +528,8 @@ xfs_attrmulti_attr_set(
530 char *kbuf; 528 char *kbuf;
531 int error = EFAULT; 529 int error = EFAULT;
532 530
531 if (IS_RDONLY(&vp->v_inode))
532 return -EROFS;
533 if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode)) 533 if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
534 return EPERM; 534 return EPERM;
535 if (len > XATTR_SIZE_MAX) 535 if (len > XATTR_SIZE_MAX)
@@ -557,6 +557,9 @@ xfs_attrmulti_attr_remove(
557{ 557{
558 int error; 558 int error;
559 559
560
561 if (IS_RDONLY(&vp->v_inode))
562 return -EROFS;
560 if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode)) 563 if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
561 return EPERM; 564 return EPERM;
562 565
@@ -745,9 +748,8 @@ xfs_ioctl(
745 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? 748 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
746 mp->m_rtdev_targp : mp->m_ddev_targp; 749 mp->m_rtdev_targp : mp->m_ddev_targp;
747 750
748 da.d_mem = da.d_miniosz = 1 << target->pbr_sshift; 751 da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
749 /* The size dio will do in one go */ 752 da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
750 da.d_maxiosz = 64 * PAGE_CACHE_SIZE;
751 753
752 if (copy_to_user(arg, &da, sizeof(da))) 754 if (copy_to_user(arg, &da, sizeof(da)))
753 return -XFS_ERROR(EFAULT); 755 return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index c83ae15bb0e6..a7c9ba1a9f7b 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -19,7 +19,6 @@
19#include <linux/compat.h> 19#include <linux/compat.h>
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/ioctl.h> 21#include <linux/ioctl.h>
22#include <linux/ioctl32.h>
23#include <linux/syscalls.h> 22#include <linux/syscalls.h>
24#include <linux/types.h> 23#include <linux/types.h>
25#include <linux/fs.h> 24#include <linux/fs.h>
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 14215a7db59f..76c6df34d0db 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -51,8 +51,44 @@
51#include "xfs_buf_item.h" 51#include "xfs_buf_item.h"
52#include "xfs_utils.h" 52#include "xfs_utils.h"
53 53
54#include <linux/capability.h>
54#include <linux/xattr.h> 55#include <linux/xattr.h>
55#include <linux/namei.h> 56#include <linux/namei.h>
57#include <linux/security.h>
58
59/*
60 * Get a XFS inode from a given vnode.
61 */
62xfs_inode_t *
63xfs_vtoi(
64 struct vnode *vp)
65{
66 bhv_desc_t *bdp;
67
68 bdp = bhv_lookup_range(VN_BHV_HEAD(vp),
69 VNODE_POSITION_XFS, VNODE_POSITION_XFS);
70 if (unlikely(bdp == NULL))
71 return NULL;
72 return XFS_BHVTOI(bdp);
73}
74
75/*
76 * Bring the atime in the XFS inode uptodate.
77 * Used before logging the inode to disk or when the Linux inode goes away.
78 */
79void
80xfs_synchronize_atime(
81 xfs_inode_t *ip)
82{
83 vnode_t *vp;
84
85 vp = XFS_ITOV_NULL(ip);
86 if (vp) {
87 struct inode *inode = &vp->v_inode;
88 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
89 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
90 }
91}
56 92
57/* 93/*
58 * Change the requested timestamp in the given inode. 94 * Change the requested timestamp in the given inode.
@@ -73,23 +109,6 @@ xfs_ichgtime(
73 struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip)); 109 struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip));
74 timespec_t tv; 110 timespec_t tv;
75 111
76 /*
77 * We're not supposed to change timestamps in readonly-mounted
78 * filesystems. Throw it away if anyone asks us.
79 */
80 if (unlikely(IS_RDONLY(inode)))
81 return;
82
83 /*
84 * Don't update access timestamps on reads if mounted "noatime".
85 * Throw it away if anyone asks us.
86 */
87 if (unlikely(
88 (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&
89 (flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) ==
90 XFS_ICHGTIME_ACC))
91 return;
92
93 nanotime(&tv); 112 nanotime(&tv);
94 if (flags & XFS_ICHGTIME_MOD) { 113 if (flags & XFS_ICHGTIME_MOD) {
95 inode->i_mtime = tv; 114 inode->i_mtime = tv;
@@ -126,8 +145,6 @@ xfs_ichgtime(
126 * Variant on the above which avoids querying the system clock 145 * Variant on the above which avoids querying the system clock
127 * in situations where we know the Linux inode timestamps have 146 * in situations where we know the Linux inode timestamps have
128 * just been updated (and so we can update our inode cheaply). 147 * just been updated (and so we can update our inode cheaply).
129 * We also skip the readonly and noatime checks here, they are
130 * also catered for already.
131 */ 148 */
132void 149void
133xfs_ichgtime_fast( 150xfs_ichgtime_fast(
@@ -138,20 +155,16 @@ xfs_ichgtime_fast(
138 timespec_t *tvp; 155 timespec_t *tvp;
139 156
140 /* 157 /*
141 * We're not supposed to change timestamps in readonly-mounted 158 * Atime updates for read() & friends are handled lazily now, and
142 * filesystems. Throw it away if anyone asks us. 159 * explicit updates must go through xfs_ichgtime()
143 */ 160 */
144 if (unlikely(IS_RDONLY(inode))) 161 ASSERT((flags & XFS_ICHGTIME_ACC) == 0);
145 return;
146 162
147 /* 163 /*
148 * Don't update access timestamps on reads if mounted "noatime". 164 * We're not supposed to change timestamps in readonly-mounted
149 * Throw it away if anyone asks us. 165 * filesystems. Throw it away if anyone asks us.
150 */ 166 */
151 if (unlikely( 167 if (unlikely(IS_RDONLY(inode)))
152 (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&
153 ((flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) ==
154 XFS_ICHGTIME_ACC)))
155 return; 168 return;
156 169
157 if (flags & XFS_ICHGTIME_MOD) { 170 if (flags & XFS_ICHGTIME_MOD) {
@@ -159,11 +172,6 @@ xfs_ichgtime_fast(
159 ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec; 172 ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;
160 ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec; 173 ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec;
161 } 174 }
162 if (flags & XFS_ICHGTIME_ACC) {
163 tvp = &inode->i_atime;
164 ip->i_d.di_atime.t_sec = (__int32_t)tvp->tv_sec;
165 ip->i_d.di_atime.t_nsec = (__int32_t)tvp->tv_nsec;
166 }
167 if (flags & XFS_ICHGTIME_CHG) { 175 if (flags & XFS_ICHGTIME_CHG) {
168 tvp = &inode->i_ctime; 176 tvp = &inode->i_ctime;
169 ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec; 177 ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec;
@@ -203,13 +211,46 @@ validate_fields(
203 ip->i_nlink = va.va_nlink; 211 ip->i_nlink = va.va_nlink;
204 ip->i_blocks = va.va_nblocks; 212 ip->i_blocks = va.va_nblocks;
205 213
206 /* we're under i_sem so i_size can't change under us */ 214 /* we're under i_mutex so i_size can't change under us */
207 if (i_size_read(ip) != va.va_size) 215 if (i_size_read(ip) != va.va_size)
208 i_size_write(ip, va.va_size); 216 i_size_write(ip, va.va_size);
209 } 217 }
210} 218}
211 219
212/* 220/*
221 * Hook in SELinux. This is not quite correct yet, what we really need
222 * here (as we do for default ACLs) is a mechanism by which creation of
223 * these attrs can be journalled at inode creation time (along with the
224 * inode, of course, such that log replay can't cause these to be lost).
225 */
226STATIC int
227linvfs_init_security(
228 struct vnode *vp,
229 struct inode *dir)
230{
231 struct inode *ip = LINVFS_GET_IP(vp);
232 size_t length;
233 void *value;
234 char *name;
235 int error;
236
237 error = security_inode_init_security(ip, dir, &name, &value, &length);
238 if (error) {
239 if (error == -EOPNOTSUPP)
240 return 0;
241 return -error;
242 }
243
244 VOP_ATTR_SET(vp, name, value, length, ATTR_SECURE, NULL, error);
245 if (!error)
246 VMODIFY(vp);
247
248 kfree(name);
249 kfree(value);
250 return error;
251}
252
253/*
213 * Determine whether a process has a valid fs_struct (kernel daemons 254 * Determine whether a process has a valid fs_struct (kernel daemons
214 * like knfsd don't have an fs_struct). 255 * like knfsd don't have an fs_struct).
215 * 256 *
@@ -274,6 +315,9 @@ linvfs_mknod(
274 break; 315 break;
275 } 316 }
276 317
318 if (!error)
319 error = linvfs_init_security(vp, dir);
320
277 if (default_acl) { 321 if (default_acl) {
278 if (!error) { 322 if (!error) {
279 error = _ACL_INHERIT(vp, &va, default_acl); 323 error = _ACL_INHERIT(vp, &va, default_acl);
@@ -290,8 +334,6 @@ linvfs_mknod(
290 teardown.d_inode = ip = LINVFS_GET_IP(vp); 334 teardown.d_inode = ip = LINVFS_GET_IP(vp);
291 teardown.d_name = dentry->d_name; 335 teardown.d_name = dentry->d_name;
292 336
293 vn_mark_bad(vp);
294
295 if (S_ISDIR(mode)) 337 if (S_ISDIR(mode))
296 VOP_RMDIR(dvp, &teardown, NULL, err2); 338 VOP_RMDIR(dvp, &teardown, NULL, err2);
297 else 339 else
@@ -429,11 +471,14 @@ linvfs_symlink(
429 471
430 error = 0; 472 error = 0;
431 VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error); 473 VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
432 if (!error && cvp) { 474 if (likely(!error && cvp)) {
433 ip = LINVFS_GET_IP(cvp); 475 error = linvfs_init_security(cvp, dir);
434 d_instantiate(dentry, ip); 476 if (likely(!error)) {
435 validate_fields(dir); 477 ip = LINVFS_GET_IP(cvp);
436 validate_fields(ip); /* size needs update */ 478 d_instantiate(dentry, ip);
479 validate_fields(dir);
480 validate_fields(ip);
481 }
437 } 482 }
438 return -error; 483 return -error;
439} 484}
@@ -502,7 +547,7 @@ linvfs_follow_link(
502 ASSERT(dentry); 547 ASSERT(dentry);
503 ASSERT(nd); 548 ASSERT(nd);
504 549
505 link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL); 550 link = (char *)kmalloc(MAXPATHLEN+1, GFP_KERNEL);
506 if (!link) { 551 if (!link) {
507 nd_set_link(nd, ERR_PTR(-ENOMEM)); 552 nd_set_link(nd, ERR_PTR(-ENOMEM));
508 return NULL; 553 return NULL;
@@ -518,12 +563,12 @@ linvfs_follow_link(
518 vp = LINVFS_GET_VP(dentry->d_inode); 563 vp = LINVFS_GET_VP(dentry->d_inode);
519 564
520 iov.iov_base = link; 565 iov.iov_base = link;
521 iov.iov_len = MAXNAMELEN; 566 iov.iov_len = MAXPATHLEN;
522 567
523 uio->uio_iov = &iov; 568 uio->uio_iov = &iov;
524 uio->uio_offset = 0; 569 uio->uio_offset = 0;
525 uio->uio_segflg = UIO_SYSSPACE; 570 uio->uio_segflg = UIO_SYSSPACE;
526 uio->uio_resid = MAXNAMELEN; 571 uio->uio_resid = MAXPATHLEN;
527 uio->uio_iovcnt = 1; 572 uio->uio_iovcnt = 1;
528 573
529 VOP_READLINK(vp, uio, 0, NULL, error); 574 VOP_READLINK(vp, uio, 0, NULL, error);
@@ -531,7 +576,7 @@ linvfs_follow_link(
531 kfree(link); 576 kfree(link);
532 link = ERR_PTR(-error); 577 link = ERR_PTR(-error);
533 } else { 578 } else {
534 link[MAXNAMELEN - uio->uio_resid] = '\0'; 579 link[MAXPATHLEN - uio->uio_resid] = '\0';
535 } 580 }
536 kfree(uio); 581 kfree(uio);
537 582
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index ee784b63acbf..6899a6b4a50a 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -26,11 +26,6 @@ extern struct file_operations linvfs_file_operations;
26extern struct file_operations linvfs_invis_file_operations; 26extern struct file_operations linvfs_invis_file_operations;
27extern struct file_operations linvfs_dir_operations; 27extern struct file_operations linvfs_dir_operations;
28 28
29extern struct address_space_operations linvfs_aops;
30
31extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
32extern void linvfs_unwritten_done(struct buffer_head *, int);
33
34extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *, 29extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
35 int, unsigned int, void __user *); 30 int, unsigned int, void __user *);
36 31
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index d8e21ba0cccc..67389b745526 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -110,10 +110,6 @@
110 * delalloc and these ondisk-uninitialised buffers. 110 * delalloc and these ondisk-uninitialised buffers.
111 */ 111 */
112BUFFER_FNS(PrivateStart, unwritten); 112BUFFER_FNS(PrivateStart, unwritten);
113static inline void set_buffer_unwritten_io(struct buffer_head *bh)
114{
115 bh->b_end_io = linvfs_unwritten_done;
116}
117 113
118#define restricted_chown xfs_params.restrict_chown.val 114#define restricted_chown xfs_params.restrict_chown.val
119#define irix_sgid_inherit xfs_params.sgid_inherit.val 115#define irix_sgid_inherit xfs_params.sgid_inherit.val
@@ -232,7 +228,7 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
232#define xfs_itruncate_data(ip, off) \ 228#define xfs_itruncate_data(ip, off) \
233 (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) 229 (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
234#define xfs_statvfs_fsid(statp, mp) \ 230#define xfs_statvfs_fsid(statp, mp) \
235 ({ u64 id = huge_encode_dev((mp)->m_dev); \ 231 ({ u64 id = huge_encode_dev((mp)->m_ddev_targp->bt_dev); \
236 __kernel_fsid_t *fsid = &(statp)->f_fsid; \ 232 __kernel_fsid_t *fsid = &(statp)->f_fsid; \
237 (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); }) 233 (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); })
238 234
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 279e9bc92aba..e0ab45fbfebd 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -233,8 +233,8 @@ xfs_read(
233 xfs_buftarg_t *target = 233 xfs_buftarg_t *target =
234 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? 234 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
235 mp->m_rtdev_targp : mp->m_ddev_targp; 235 mp->m_rtdev_targp : mp->m_ddev_targp;
236 if ((*offset & target->pbr_smask) || 236 if ((*offset & target->bt_smask) ||
237 (size & target->pbr_smask)) { 237 (size & target->bt_smask)) {
238 if (*offset == ip->i_d.di_size) { 238 if (*offset == ip->i_d.di_size) {
239 return (0); 239 return (0);
240 } 240 }
@@ -254,7 +254,7 @@ xfs_read(
254 } 254 }
255 255
256 if (unlikely(ioflags & IO_ISDIRECT)) 256 if (unlikely(ioflags & IO_ISDIRECT))
257 down(&inode->i_sem); 257 mutex_lock(&inode->i_mutex);
258 xfs_ilock(ip, XFS_IOLOCK_SHARED); 258 xfs_ilock(ip, XFS_IOLOCK_SHARED);
259 259
260 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && 260 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
@@ -281,12 +281,9 @@ xfs_read(
281 281
282 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 282 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
283 283
284 if (likely(!(ioflags & IO_INVIS)))
285 xfs_ichgtime_fast(ip, inode, XFS_ICHGTIME_ACC);
286
287unlock_isem: 284unlock_isem:
288 if (unlikely(ioflags & IO_ISDIRECT)) 285 if (unlikely(ioflags & IO_ISDIRECT))
289 up(&inode->i_sem); 286 mutex_unlock(&inode->i_mutex);
290 return ret; 287 return ret;
291} 288}
292 289
@@ -346,9 +343,6 @@ xfs_sendfile(
346 if (ret > 0) 343 if (ret > 0)
347 XFS_STATS_ADD(xs_read_bytes, ret); 344 XFS_STATS_ADD(xs_read_bytes, ret);
348 345
349 if (likely(!(ioflags & IO_INVIS)))
350 xfs_ichgtime_fast(ip, LINVFS_GET_IP(vp), XFS_ICHGTIME_ACC);
351
352 return ret; 346 return ret;
353} 347}
354 348
@@ -362,7 +356,6 @@ STATIC int /* error (positive) */
362xfs_zero_last_block( 356xfs_zero_last_block(
363 struct inode *ip, 357 struct inode *ip,
364 xfs_iocore_t *io, 358 xfs_iocore_t *io,
365 xfs_off_t offset,
366 xfs_fsize_t isize, 359 xfs_fsize_t isize,
367 xfs_fsize_t end_size) 360 xfs_fsize_t end_size)
368{ 361{
@@ -371,19 +364,16 @@ xfs_zero_last_block(
371 int nimaps; 364 int nimaps;
372 int zero_offset; 365 int zero_offset;
373 int zero_len; 366 int zero_len;
374 int isize_fsb_offset;
375 int error = 0; 367 int error = 0;
376 xfs_bmbt_irec_t imap; 368 xfs_bmbt_irec_t imap;
377 loff_t loff; 369 loff_t loff;
378 size_t lsize;
379 370
380 ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0); 371 ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
381 ASSERT(offset > isize);
382 372
383 mp = io->io_mount; 373 mp = io->io_mount;
384 374
385 isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize); 375 zero_offset = XFS_B_FSB_OFFSET(mp, isize);
386 if (isize_fsb_offset == 0) { 376 if (zero_offset == 0) {
387 /* 377 /*
388 * There are no extra bytes in the last block on disk to 378 * There are no extra bytes in the last block on disk to
389 * zero, so return. 379 * zero, so return.
@@ -413,10 +403,8 @@ xfs_zero_last_block(
413 */ 403 */
414 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); 404 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
415 loff = XFS_FSB_TO_B(mp, last_fsb); 405 loff = XFS_FSB_TO_B(mp, last_fsb);
416 lsize = XFS_FSB_TO_B(mp, 1);
417 406
418 zero_offset = isize_fsb_offset; 407 zero_len = mp->m_sb.sb_blocksize - zero_offset;
419 zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset;
420 408
421 error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size); 409 error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
422 410
@@ -447,20 +435,17 @@ xfs_zero_eof(
447 struct inode *ip = LINVFS_GET_IP(vp); 435 struct inode *ip = LINVFS_GET_IP(vp);
448 xfs_fileoff_t start_zero_fsb; 436 xfs_fileoff_t start_zero_fsb;
449 xfs_fileoff_t end_zero_fsb; 437 xfs_fileoff_t end_zero_fsb;
450 xfs_fileoff_t prev_zero_fsb;
451 xfs_fileoff_t zero_count_fsb; 438 xfs_fileoff_t zero_count_fsb;
452 xfs_fileoff_t last_fsb; 439 xfs_fileoff_t last_fsb;
453 xfs_extlen_t buf_len_fsb; 440 xfs_extlen_t buf_len_fsb;
454 xfs_extlen_t prev_zero_count;
455 xfs_mount_t *mp; 441 xfs_mount_t *mp;
456 int nimaps; 442 int nimaps;
457 int error = 0; 443 int error = 0;
458 xfs_bmbt_irec_t imap; 444 xfs_bmbt_irec_t imap;
459 loff_t loff;
460 size_t lsize;
461 445
462 ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); 446 ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
463 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); 447 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
448 ASSERT(offset > isize);
464 449
465 mp = io->io_mount; 450 mp = io->io_mount;
466 451
@@ -468,7 +453,7 @@ xfs_zero_eof(
468 * First handle zeroing the block on which isize resides. 453 * First handle zeroing the block on which isize resides.
469 * We only zero a part of that block so it is handled specially. 454 * We only zero a part of that block so it is handled specially.
470 */ 455 */
471 error = xfs_zero_last_block(ip, io, offset, isize, end_size); 456 error = xfs_zero_last_block(ip, io, isize, end_size);
472 if (error) { 457 if (error) {
473 ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); 458 ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
474 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); 459 ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
@@ -496,8 +481,6 @@ xfs_zero_eof(
496 } 481 }
497 482
498 ASSERT(start_zero_fsb <= end_zero_fsb); 483 ASSERT(start_zero_fsb <= end_zero_fsb);
499 prev_zero_fsb = NULLFILEOFF;
500 prev_zero_count = 0;
501 while (start_zero_fsb <= end_zero_fsb) { 484 while (start_zero_fsb <= end_zero_fsb) {
502 nimaps = 1; 485 nimaps = 1;
503 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; 486 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
@@ -519,10 +502,7 @@ xfs_zero_eof(
519 * that sits on a hole and sets the page as P_HOLE 502 * that sits on a hole and sets the page as P_HOLE
520 * and calls remapf if it is a mapped file. 503 * and calls remapf if it is a mapped file.
521 */ 504 */
522 prev_zero_fsb = NULLFILEOFF; 505 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
523 prev_zero_count = 0;
524 start_zero_fsb = imap.br_startoff +
525 imap.br_blockcount;
526 ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 506 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
527 continue; 507 continue;
528 } 508 }
@@ -543,17 +523,15 @@ xfs_zero_eof(
543 */ 523 */
544 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 524 XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
545 525
546 loff = XFS_FSB_TO_B(mp, start_zero_fsb); 526 error = xfs_iozero(ip,
547 lsize = XFS_FSB_TO_B(mp, buf_len_fsb); 527 XFS_FSB_TO_B(mp, start_zero_fsb),
548 528 XFS_FSB_TO_B(mp, buf_len_fsb),
549 error = xfs_iozero(ip, loff, lsize, end_size); 529 end_size);
550 530
551 if (error) { 531 if (error) {
552 goto out_lock; 532 goto out_lock;
553 } 533 }
554 534
555 prev_zero_fsb = start_zero_fsb;
556 prev_zero_count = buf_len_fsb;
557 start_zero_fsb = imap.br_startoff + buf_len_fsb; 535 start_zero_fsb = imap.br_startoff + buf_len_fsb;
558 ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 536 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
559 537
@@ -640,7 +618,7 @@ xfs_write(
640 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? 618 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
641 mp->m_rtdev_targp : mp->m_ddev_targp; 619 mp->m_rtdev_targp : mp->m_ddev_targp;
642 620
643 if ((pos & target->pbr_smask) || (count & target->pbr_smask)) 621 if ((pos & target->bt_smask) || (count & target->bt_smask))
644 return XFS_ERROR(-EINVAL); 622 return XFS_ERROR(-EINVAL);
645 623
646 if (!VN_CACHED(vp) && pos < i_size_read(inode)) 624 if (!VN_CACHED(vp) && pos < i_size_read(inode))
@@ -655,7 +633,7 @@ relock:
655 iolock = XFS_IOLOCK_EXCL; 633 iolock = XFS_IOLOCK_EXCL;
656 locktype = VRWLOCK_WRITE; 634 locktype = VRWLOCK_WRITE;
657 635
658 down(&inode->i_sem); 636 mutex_lock(&inode->i_mutex);
659 } else { 637 } else {
660 iolock = XFS_IOLOCK_SHARED; 638 iolock = XFS_IOLOCK_SHARED;
661 locktype = VRWLOCK_WRITE_DIRECT; 639 locktype = VRWLOCK_WRITE_DIRECT;
@@ -686,7 +664,7 @@ start:
686 int dmflags = FILP_DELAY_FLAG(file); 664 int dmflags = FILP_DELAY_FLAG(file);
687 665
688 if (need_isem) 666 if (need_isem)
689 dmflags |= DM_FLAGS_ISEM; 667 dmflags |= DM_FLAGS_IMUX;
690 668
691 xfs_iunlock(xip, XFS_ILOCK_EXCL); 669 xfs_iunlock(xip, XFS_ILOCK_EXCL);
692 error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, 670 error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp,
@@ -713,7 +691,7 @@ start:
713 } 691 }
714 692
715 if (likely(!(ioflags & IO_INVIS))) { 693 if (likely(!(ioflags & IO_INVIS))) {
716 inode_update_time(inode, 1); 694 file_update_time(file);
717 xfs_ichgtime_fast(xip, inode, 695 xfs_ichgtime_fast(xip, inode,
718 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 696 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
719 } 697 }
@@ -772,7 +750,7 @@ retry:
772 if (need_isem) { 750 if (need_isem) {
773 /* demote the lock now the cached pages are gone */ 751 /* demote the lock now the cached pages are gone */
774 XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); 752 XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
775 up(&inode->i_sem); 753 mutex_unlock(&inode->i_mutex);
776 754
777 iolock = XFS_IOLOCK_SHARED; 755 iolock = XFS_IOLOCK_SHARED;
778 locktype = VRWLOCK_WRITE_DIRECT; 756 locktype = VRWLOCK_WRITE_DIRECT;
@@ -817,20 +795,24 @@ retry:
817 795
818 xfs_rwunlock(bdp, locktype); 796 xfs_rwunlock(bdp, locktype);
819 if (need_isem) 797 if (need_isem)
820 up(&inode->i_sem); 798 mutex_unlock(&inode->i_mutex);
821 error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, 799 error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
822 DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 800 DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
823 0, 0, 0); /* Delay flag intentionally unused */ 801 0, 0, 0); /* Delay flag intentionally unused */
824 if (error) 802 if (error)
825 goto out_nounlocks; 803 goto out_nounlocks;
826 if (need_isem) 804 if (need_isem)
827 down(&inode->i_sem); 805 mutex_lock(&inode->i_mutex);
828 xfs_rwlock(bdp, locktype); 806 xfs_rwlock(bdp, locktype);
829 pos = xip->i_d.di_size; 807 pos = xip->i_d.di_size;
830 ret = 0; 808 ret = 0;
831 goto retry; 809 goto retry;
832 } 810 }
833 811
812 isize = i_size_read(inode);
813 if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
814 *offset = isize;
815
834 if (*offset > xip->i_d.di_size) { 816 if (*offset > xip->i_d.di_size) {
835 xfs_ilock(xip, XFS_ILOCK_EXCL); 817 xfs_ilock(xip, XFS_ILOCK_EXCL);
836 if (*offset > xip->i_d.di_size) { 818 if (*offset > xip->i_d.di_size) {
@@ -926,7 +908,7 @@ retry:
926 908
927 xfs_rwunlock(bdp, locktype); 909 xfs_rwunlock(bdp, locktype);
928 if (need_isem) 910 if (need_isem)
929 up(&inode->i_sem); 911 mutex_unlock(&inode->i_mutex);
930 912
931 error = sync_page_range(inode, mapping, pos, ret); 913 error = sync_page_range(inode, mapping, pos, ret);
932 if (!error) 914 if (!error)
@@ -938,7 +920,7 @@ retry:
938 xfs_rwunlock(bdp, locktype); 920 xfs_rwunlock(bdp, locktype);
939 out_unlock_isem: 921 out_unlock_isem:
940 if (need_isem) 922 if (need_isem)
941 up(&inode->i_sem); 923 mutex_unlock(&inode->i_mutex);
942 out_nounlocks: 924 out_nounlocks:
943 return -error; 925 return -error;
944} 926}
@@ -956,7 +938,7 @@ xfs_bdstrat_cb(struct xfs_buf *bp)
956 938
957 mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); 939 mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
958 if (!XFS_FORCED_SHUTDOWN(mp)) { 940 if (!XFS_FORCED_SHUTDOWN(mp)) {
959 pagebuf_iorequest(bp); 941 xfs_buf_iorequest(bp);
960 return 0; 942 return 0;
961 } else { 943 } else {
962 xfs_buftrace("XFS__BDSTRAT IOERROR", bp); 944 xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
@@ -1009,7 +991,7 @@ xfsbdstrat(
1009 * if (XFS_BUF_IS_GRIO(bp)) { 991 * if (XFS_BUF_IS_GRIO(bp)) {
1010 */ 992 */
1011 993
1012 pagebuf_iorequest(bp); 994 xfs_buf_iorequest(bp);
1013 return 0; 995 return 0;
1014 } 996 }
1015 997
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 6c40a74be7c8..8955720a2c6b 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -34,7 +34,7 @@ xfs_read_xfsstats(
34 __uint64_t xs_write_bytes = 0; 34 __uint64_t xs_write_bytes = 0;
35 __uint64_t xs_read_bytes = 0; 35 __uint64_t xs_read_bytes = 0;
36 36
37 static struct xstats_entry { 37 static const struct xstats_entry {
38 char *desc; 38 char *desc;
39 int endpoint; 39 int endpoint;
40 } xstats[] = { 40 } xstats[] = {
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index 50027c4a5618..8ba7a2fa6c1d 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -109,15 +109,15 @@ struct xfsstats {
109 __uint32_t vn_remove; /* # times vn_remove called */ 109 __uint32_t vn_remove; /* # times vn_remove called */
110 __uint32_t vn_free; /* # times vn_free called */ 110 __uint32_t vn_free; /* # times vn_free called */
111#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9) 111#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9)
112 __uint32_t pb_get; 112 __uint32_t xb_get;
113 __uint32_t pb_create; 113 __uint32_t xb_create;
114 __uint32_t pb_get_locked; 114 __uint32_t xb_get_locked;
115 __uint32_t pb_get_locked_waited; 115 __uint32_t xb_get_locked_waited;
116 __uint32_t pb_busy_locked; 116 __uint32_t xb_busy_locked;
117 __uint32_t pb_miss_locked; 117 __uint32_t xb_miss_locked;
118 __uint32_t pb_page_retries; 118 __uint32_t xb_page_retries;
119 __uint32_t pb_page_found; 119 __uint32_t xb_page_found;
120 __uint32_t pb_get_read; 120 __uint32_t xb_get_read;
121/* Extra precision counters */ 121/* Extra precision counters */
122 __uint64_t xs_xstrat_bytes; 122 __uint64_t xs_xstrat_bytes;
123 __uint64_t xs_write_bytes; 123 __uint64_t xs_write_bytes;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 6116b5bf433e..f22e426d9e42 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -306,13 +306,15 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
306 xfs_fs_cmn_err(CE_NOTE, mp, 306 xfs_fs_cmn_err(CE_NOTE, mp,
307 "Disabling barriers, not supported with external log device"); 307 "Disabling barriers, not supported with external log device");
308 mp->m_flags &= ~XFS_MOUNT_BARRIER; 308 mp->m_flags &= ~XFS_MOUNT_BARRIER;
309 return;
309 } 310 }
310 311
311 if (mp->m_ddev_targp->pbr_bdev->bd_disk->queue->ordered == 312 if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
312 QUEUE_ORDERED_NONE) { 313 QUEUE_ORDERED_NONE) {
313 xfs_fs_cmn_err(CE_NOTE, mp, 314 xfs_fs_cmn_err(CE_NOTE, mp,
314 "Disabling barriers, not supported by the underlying device"); 315 "Disabling barriers, not supported by the underlying device");
315 mp->m_flags &= ~XFS_MOUNT_BARRIER; 316 mp->m_flags &= ~XFS_MOUNT_BARRIER;
317 return;
316 } 318 }
317 319
318 error = xfs_barrier_test(mp); 320 error = xfs_barrier_test(mp);
@@ -320,6 +322,7 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
320 xfs_fs_cmn_err(CE_NOTE, mp, 322 xfs_fs_cmn_err(CE_NOTE, mp,
321 "Disabling barriers, trial barrier write failed"); 323 "Disabling barriers, trial barrier write failed");
322 mp->m_flags &= ~XFS_MOUNT_BARRIER; 324 mp->m_flags &= ~XFS_MOUNT_BARRIER;
325 return;
323 } 326 }
324} 327}
325 328
@@ -327,7 +330,7 @@ void
327xfs_blkdev_issue_flush( 330xfs_blkdev_issue_flush(
328 xfs_buftarg_t *buftarg) 331 xfs_buftarg_t *buftarg)
329{ 332{
330 blkdev_issue_flush(buftarg->pbr_bdev, NULL); 333 blkdev_issue_flush(buftarg->bt_bdev, NULL);
331} 334}
332 335
333STATIC struct inode * 336STATIC struct inode *
@@ -576,7 +579,7 @@ xfssyncd(
576 timeleft = schedule_timeout_interruptible(timeleft); 579 timeleft = schedule_timeout_interruptible(timeleft);
577 /* swsusp */ 580 /* swsusp */
578 try_to_freeze(); 581 try_to_freeze();
579 if (kthread_should_stop()) 582 if (kthread_should_stop() && list_empty(&vfsp->vfs_sync_list))
580 break; 583 break;
581 584
582 spin_lock(&vfsp->vfs_sync_lock); 585 spin_lock(&vfsp->vfs_sync_lock);
@@ -966,9 +969,9 @@ init_xfs_fs( void )
966 if (error < 0) 969 if (error < 0)
967 goto undo_zones; 970 goto undo_zones;
968 971
969 error = pagebuf_init(); 972 error = xfs_buf_init();
970 if (error < 0) 973 if (error < 0)
971 goto undo_pagebuf; 974 goto undo_buffers;
972 975
973 vn_init(); 976 vn_init();
974 xfs_init(); 977 xfs_init();
@@ -982,9 +985,9 @@ init_xfs_fs( void )
982 return 0; 985 return 0;
983 986
984undo_register: 987undo_register:
985 pagebuf_terminate(); 988 xfs_buf_terminate();
986 989
987undo_pagebuf: 990undo_buffers:
988 linvfs_destroy_zones(); 991 linvfs_destroy_zones();
989 992
990undo_zones: 993undo_zones:
@@ -998,7 +1001,7 @@ exit_xfs_fs( void )
998 XFS_DM_EXIT(&xfs_fs_type); 1001 XFS_DM_EXIT(&xfs_fs_type);
999 unregister_filesystem(&xfs_fs_type); 1002 unregister_filesystem(&xfs_fs_type);
1000 xfs_cleanup(); 1003 xfs_cleanup();
1001 pagebuf_terminate(); 1004 xfs_buf_terminate();
1002 linvfs_destroy_zones(); 1005 linvfs_destroy_zones();
1003 ktrace_uninit(); 1006 ktrace_uninit();
1004} 1007}
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index e9bbcb4d6243..260dd8415dd7 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -106,7 +106,6 @@ vn_revalidate_core(
106 inode->i_blocks = vap->va_nblocks; 106 inode->i_blocks = vap->va_nblocks;
107 inode->i_mtime = vap->va_mtime; 107 inode->i_mtime = vap->va_mtime;
108 inode->i_ctime = vap->va_ctime; 108 inode->i_ctime = vap->va_ctime;
109 inode->i_atime = vap->va_atime;
110 inode->i_blksize = vap->va_blocksize; 109 inode->i_blksize = vap->va_blocksize;
111 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) 110 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
112 inode->i_flags |= S_IMMUTABLE; 111 inode->i_flags |= S_IMMUTABLE;
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index f2bbb327c081..0fe2419461d6 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -566,6 +566,25 @@ static inline int VN_BAD(struct vnode *vp)
566} 566}
567 567
568/* 568/*
569 * Extracting atime values in various formats
570 */
571static inline void vn_atime_to_bstime(struct vnode *vp, xfs_bstime_t *bs_atime)
572{
573 bs_atime->tv_sec = vp->v_inode.i_atime.tv_sec;
574 bs_atime->tv_nsec = vp->v_inode.i_atime.tv_nsec;
575}
576
577static inline void vn_atime_to_timespec(struct vnode *vp, struct timespec *ts)
578{
579 *ts = vp->v_inode.i_atime;
580}
581
582static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
583{
584 *tt = vp->v_inode.i_atime.tv_sec;
585}
586
587/*
569 * Some useful predicates. 588 * Some useful predicates.
570 */ 589 */
571#define VN_MAPPED(vp) mapping_mapped(LINVFS_GET_IP(vp)->i_mapping) 590#define VN_MAPPED(vp) mapping_mapped(LINVFS_GET_IP(vp)->i_mapping)
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 00b5043dfa5a..772ac48329ea 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -104,7 +104,7 @@ xfs_qm_dqinit(
104 */ 104 */
105 if (brandnewdquot) { 105 if (brandnewdquot) {
106 dqp->dq_flnext = dqp->dq_flprev = dqp; 106 dqp->dq_flnext = dqp->dq_flprev = dqp;
107 mutex_init(&dqp->q_qlock, MUTEX_DEFAULT, "xdq"); 107 mutex_init(&dqp->q_qlock);
108 initnsema(&dqp->q_flock, 1, "fdq"); 108 initnsema(&dqp->q_flock, 1, "fdq");
109 sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq"); 109 sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq");
110 110
@@ -1382,7 +1382,7 @@ void
1382xfs_dqlock( 1382xfs_dqlock(
1383 xfs_dquot_t *dqp) 1383 xfs_dquot_t *dqp)
1384{ 1384{
1385 mutex_lock(&(dqp->q_qlock), PINOD); 1385 mutex_lock(&(dqp->q_qlock));
1386} 1386}
1387 1387
1388void 1388void
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 2f69822344e5..2ec6b441849c 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -239,7 +239,7 @@ xfs_qm_dquot_logitem_pushbuf(
239 * trying to duplicate our effort. 239 * trying to duplicate our effort.
240 */ 240 */
241 ASSERT(qip->qli_pushbuf_flag != 0); 241 ASSERT(qip->qli_pushbuf_flag != 0);
242 ASSERT(qip->qli_push_owner == get_thread_id()); 242 ASSERT(qip->qli_push_owner == current_pid());
243 243
244 /* 244 /*
245 * If flushlock isn't locked anymore, chances are that the 245 * If flushlock isn't locked anymore, chances are that the
@@ -333,7 +333,7 @@ xfs_qm_dquot_logitem_trylock(
333 qip->qli_pushbuf_flag = 1; 333 qip->qli_pushbuf_flag = 1;
334 ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno); 334 ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno);
335#ifdef DEBUG 335#ifdef DEBUG
336 qip->qli_push_owner = get_thread_id(); 336 qip->qli_push_owner = current_pid();
337#endif 337#endif
338 /* 338 /*
339 * The dquot is left locked. 339 * The dquot is left locked.
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 5328a2937127..53a00fb217fa 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -167,7 +167,7 @@ xfs_Gqm_init(void)
167 xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO; 167 xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
168 xqm->qm_nrefs = 0; 168 xqm->qm_nrefs = 0;
169#ifdef DEBUG 169#ifdef DEBUG
170 mutex_init(&qcheck_lock, MUTEX_DEFAULT, "qchk"); 170 mutex_init(&qcheck_lock);
171#endif 171#endif
172 return xqm; 172 return xqm;
173} 173}
@@ -497,7 +497,7 @@ xfs_qm_dqflush_all(
497 int error; 497 int error;
498 498
499 if (mp->m_quotainfo == NULL) 499 if (mp->m_quotainfo == NULL)
500 return (0); 500 return 0;
501 niters = 0; 501 niters = 0;
502again: 502again:
503 xfs_qm_mplist_lock(mp); 503 xfs_qm_mplist_lock(mp);
@@ -528,7 +528,7 @@ again:
528 error = xfs_qm_dqflush(dqp, flags); 528 error = xfs_qm_dqflush(dqp, flags);
529 xfs_dqunlock(dqp); 529 xfs_dqunlock(dqp);
530 if (error) 530 if (error)
531 return (error); 531 return error;
532 532
533 xfs_qm_mplist_lock(mp); 533 xfs_qm_mplist_lock(mp);
534 if (recl != XFS_QI_MPLRECLAIMS(mp)) { 534 if (recl != XFS_QI_MPLRECLAIMS(mp)) {
@@ -540,7 +540,7 @@ again:
540 540
541 xfs_qm_mplist_unlock(mp); 541 xfs_qm_mplist_unlock(mp);
542 /* return ! busy */ 542 /* return ! busy */
543 return (0); 543 return 0;
544} 544}
545/* 545/*
546 * Release the group dquot pointers the user dquots may be 546 * Release the group dquot pointers the user dquots may be
@@ -599,7 +599,7 @@ xfs_qm_dqpurge_int(
599 int nmisses; 599 int nmisses;
600 600
601 if (mp->m_quotainfo == NULL) 601 if (mp->m_quotainfo == NULL)
602 return (0); 602 return 0;
603 603
604 dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0; 604 dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
605 dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0; 605 dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
@@ -796,7 +796,7 @@ xfs_qm_dqattach_one(
796 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 796 ASSERT(XFS_DQ_IS_LOCKED(dqp));
797 } 797 }
798#endif 798#endif
799 return (error); 799 return error;
800} 800}
801 801
802 802
@@ -897,7 +897,7 @@ xfs_qm_dqattach(
897 (! XFS_NOT_DQATTACHED(mp, ip)) || 897 (! XFS_NOT_DQATTACHED(mp, ip)) ||
898 (ip->i_ino == mp->m_sb.sb_uquotino) || 898 (ip->i_ino == mp->m_sb.sb_uquotino) ||
899 (ip->i_ino == mp->m_sb.sb_gquotino)) 899 (ip->i_ino == mp->m_sb.sb_gquotino))
900 return (0); 900 return 0;
901 901
902 ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 || 902 ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
903 XFS_ISLOCKED_INODE_EXCL(ip)); 903 XFS_ISLOCKED_INODE_EXCL(ip));
@@ -984,7 +984,7 @@ xfs_qm_dqattach(
984 else 984 else
985 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 985 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
986#endif 986#endif
987 return (error); 987 return error;
988} 988}
989 989
990/* 990/*
@@ -1049,7 +1049,7 @@ xfs_qm_sync(
1049 */ 1049 */
1050 if (! XFS_IS_QUOTA_ON(mp)) { 1050 if (! XFS_IS_QUOTA_ON(mp)) {
1051 xfs_qm_mplist_unlock(mp); 1051 xfs_qm_mplist_unlock(mp);
1052 return (0); 1052 return 0;
1053 } 1053 }
1054 FOREACH_DQUOT_IN_MP(dqp, mp) { 1054 FOREACH_DQUOT_IN_MP(dqp, mp) {
1055 /* 1055 /*
@@ -1109,9 +1109,9 @@ xfs_qm_sync(
1109 error = xfs_qm_dqflush(dqp, flush_flags); 1109 error = xfs_qm_dqflush(dqp, flush_flags);
1110 xfs_dqunlock(dqp); 1110 xfs_dqunlock(dqp);
1111 if (error && XFS_FORCED_SHUTDOWN(mp)) 1111 if (error && XFS_FORCED_SHUTDOWN(mp))
1112 return(0); /* Need to prevent umount failure */ 1112 return 0; /* Need to prevent umount failure */
1113 else if (error) 1113 else if (error)
1114 return (error); 1114 return error;
1115 1115
1116 xfs_qm_mplist_lock(mp); 1116 xfs_qm_mplist_lock(mp);
1117 if (recl != XFS_QI_MPLRECLAIMS(mp)) { 1117 if (recl != XFS_QI_MPLRECLAIMS(mp)) {
@@ -1124,7 +1124,7 @@ xfs_qm_sync(
1124 } 1124 }
1125 1125
1126 xfs_qm_mplist_unlock(mp); 1126 xfs_qm_mplist_unlock(mp);
1127 return (0); 1127 return 0;
1128} 1128}
1129 1129
1130 1130
@@ -1146,7 +1146,7 @@ xfs_qm_init_quotainfo(
1146 * Tell XQM that we exist as soon as possible. 1146 * Tell XQM that we exist as soon as possible.
1147 */ 1147 */
1148 if ((error = xfs_qm_hold_quotafs_ref(mp))) { 1148 if ((error = xfs_qm_hold_quotafs_ref(mp))) {
1149 return (error); 1149 return error;
1150 } 1150 }
1151 1151
1152 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); 1152 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
@@ -1158,7 +1158,7 @@ xfs_qm_init_quotainfo(
1158 if ((error = xfs_qm_init_quotainos(mp))) { 1158 if ((error = xfs_qm_init_quotainos(mp))) {
1159 kmem_free(qinf, sizeof(xfs_quotainfo_t)); 1159 kmem_free(qinf, sizeof(xfs_quotainfo_t));
1160 mp->m_quotainfo = NULL; 1160 mp->m_quotainfo = NULL;
1161 return (error); 1161 return error;
1162 } 1162 }
1163 1163
1164 spinlock_init(&qinf->qi_pinlock, "xfs_qinf_pin"); 1164 spinlock_init(&qinf->qi_pinlock, "xfs_qinf_pin");
@@ -1166,7 +1166,7 @@ xfs_qm_init_quotainfo(
1166 qinf->qi_dqreclaims = 0; 1166 qinf->qi_dqreclaims = 0;
1167 1167
1168 /* mutex used to serialize quotaoffs */ 1168 /* mutex used to serialize quotaoffs */
1169 mutex_init(&qinf->qi_quotaofflock, MUTEX_DEFAULT, "qoff"); 1169 mutex_init(&qinf->qi_quotaofflock);
1170 1170
1171 /* Precalc some constants */ 1171 /* Precalc some constants */
1172 qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); 1172 qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
@@ -1232,7 +1232,7 @@ xfs_qm_init_quotainfo(
1232 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; 1232 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
1233 } 1233 }
1234 1234
1235 return (0); 1235 return 0;
1236} 1236}
1237 1237
1238 1238
@@ -1285,7 +1285,7 @@ xfs_qm_list_init(
1285 char *str, 1285 char *str,
1286 int n) 1286 int n)
1287{ 1287{
1288 mutex_init(&list->qh_lock, MUTEX_DEFAULT, str); 1288 mutex_init(&list->qh_lock);
1289 list->qh_next = NULL; 1289 list->qh_next = NULL;
1290 list->qh_version = 0; 1290 list->qh_version = 0;
1291 list->qh_nelems = 0; 1291 list->qh_nelems = 0;
@@ -1332,7 +1332,7 @@ xfs_qm_dqget_noattach(
1332 */ 1332 */
1333 ASSERT(error != ESRCH); 1333 ASSERT(error != ESRCH);
1334 ASSERT(error != ENOENT); 1334 ASSERT(error != ENOENT);
1335 return (error); 1335 return error;
1336 } 1336 }
1337 ASSERT(udqp); 1337 ASSERT(udqp);
1338 } 1338 }
@@ -1355,7 +1355,7 @@ xfs_qm_dqget_noattach(
1355 xfs_qm_dqrele(udqp); 1355 xfs_qm_dqrele(udqp);
1356 ASSERT(error != ESRCH); 1356 ASSERT(error != ESRCH);
1357 ASSERT(error != ENOENT); 1357 ASSERT(error != ENOENT);
1358 return (error); 1358 return error;
1359 } 1359 }
1360 ASSERT(gdqp); 1360 ASSERT(gdqp);
1361 1361
@@ -1376,7 +1376,7 @@ xfs_qm_dqget_noattach(
1376 if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp)); 1376 if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
1377 if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp)); 1377 if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
1378#endif 1378#endif
1379 return (0); 1379 return 0;
1380} 1380}
1381 1381
1382/* 1382/*
@@ -1392,26 +1392,28 @@ xfs_qm_qino_alloc(
1392{ 1392{
1393 xfs_trans_t *tp; 1393 xfs_trans_t *tp;
1394 int error; 1394 int error;
1395 unsigned long s; 1395 unsigned long s;
1396 cred_t zerocr; 1396 cred_t zerocr;
1397 xfs_inode_t zeroino;
1397 int committed; 1398 int committed;
1398 1399
1399 tp = xfs_trans_alloc(mp,XFS_TRANS_QM_QINOCREATE); 1400 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
1400 if ((error = xfs_trans_reserve(tp, 1401 if ((error = xfs_trans_reserve(tp,
1401 XFS_QM_QINOCREATE_SPACE_RES(mp), 1402 XFS_QM_QINOCREATE_SPACE_RES(mp),
1402 XFS_CREATE_LOG_RES(mp), 0, 1403 XFS_CREATE_LOG_RES(mp), 0,
1403 XFS_TRANS_PERM_LOG_RES, 1404 XFS_TRANS_PERM_LOG_RES,
1404 XFS_CREATE_LOG_COUNT))) { 1405 XFS_CREATE_LOG_COUNT))) {
1405 xfs_trans_cancel(tp, 0); 1406 xfs_trans_cancel(tp, 0);
1406 return (error); 1407 return error;
1407 } 1408 }
1408 memset(&zerocr, 0, sizeof(zerocr)); 1409 memset(&zerocr, 0, sizeof(zerocr));
1410 memset(&zeroino, 0, sizeof(zeroino));
1409 1411
1410 if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, S_IFREG, 1, 0, 1412 if ((error = xfs_dir_ialloc(&tp, &zeroino, S_IFREG, 1, 0,
1411 &zerocr, 0, 1, ip, &committed))) { 1413 &zerocr, 0, 1, ip, &committed))) {
1412 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | 1414 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1413 XFS_TRANS_ABORT); 1415 XFS_TRANS_ABORT);
1414 return (error); 1416 return error;
1415 } 1417 }
1416 1418
1417 /* 1419 /*
@@ -1459,9 +1461,9 @@ xfs_qm_qino_alloc(
1459 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, 1461 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES,
1460 NULL))) { 1462 NULL))) {
1461 xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!"); 1463 xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!");
1462 return (error); 1464 return error;
1463 } 1465 }
1464 return (0); 1466 return 0;
1465} 1467}
1466 1468
1467 1469
@@ -1506,7 +1508,7 @@ xfs_qm_reset_dqcounts(
1506 ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); 1508 ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
1507 } 1509 }
1508 1510
1509 return (0); 1511 return 0;
1510} 1512}
1511 1513
1512STATIC int 1514STATIC int
@@ -1555,7 +1557,7 @@ xfs_qm_dqiter_bufs(
1555 bno++; 1557 bno++;
1556 firstid += XFS_QM_DQPERBLK(mp); 1558 firstid += XFS_QM_DQPERBLK(mp);
1557 } 1559 }
1558 return (error); 1560 return error;
1559} 1561}
1560 1562
1561/* 1563/*
@@ -1584,7 +1586,7 @@ xfs_qm_dqiterate(
1584 * happens only at mount time which is single threaded. 1586 * happens only at mount time which is single threaded.
1585 */ 1587 */
1586 if (qip->i_d.di_nblocks == 0) 1588 if (qip->i_d.di_nblocks == 0)
1587 return (0); 1589 return 0;
1588 1590
1589 map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP); 1591 map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
1590 1592
@@ -1653,7 +1655,7 @@ xfs_qm_dqiterate(
1653 1655
1654 kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map)); 1656 kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map));
1655 1657
1656 return (error); 1658 return error;
1657} 1659}
1658 1660
1659/* 1661/*
@@ -1713,7 +1715,7 @@ xfs_qm_get_rtblks(
1713 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 1715 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1714 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 1716 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1715 if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK))) 1717 if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
1716 return (error); 1718 return error;
1717 } 1719 }
1718 rtblks = 0; 1720 rtblks = 0;
1719 nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 1721 nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
@@ -1721,7 +1723,7 @@ xfs_qm_get_rtblks(
1721 for (ep = base; ep < &base[nextents]; ep++) 1723 for (ep = base; ep < &base[nextents]; ep++)
1722 rtblks += xfs_bmbt_get_blockcount(ep); 1724 rtblks += xfs_bmbt_get_blockcount(ep);
1723 *O_rtblks = (xfs_qcnt_t)rtblks; 1725 *O_rtblks = (xfs_qcnt_t)rtblks;
1724 return (0); 1726 return 0;
1725} 1727}
1726 1728
1727/* 1729/*
@@ -1765,7 +1767,7 @@ xfs_qm_dqusage_adjust(
1765 */ 1767 */
1766 if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) { 1768 if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
1767 *res = BULKSTAT_RV_NOTHING; 1769 *res = BULKSTAT_RV_NOTHING;
1768 return (error); 1770 return error;
1769 } 1771 }
1770 1772
1771 if (ip->i_d.di_mode == 0) { 1773 if (ip->i_d.di_mode == 0) {
@@ -1783,7 +1785,7 @@ xfs_qm_dqusage_adjust(
1783 if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) { 1785 if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
1784 xfs_iput(ip, XFS_ILOCK_EXCL); 1786 xfs_iput(ip, XFS_ILOCK_EXCL);
1785 *res = BULKSTAT_RV_GIVEUP; 1787 *res = BULKSTAT_RV_GIVEUP;
1786 return (error); 1788 return error;
1787 } 1789 }
1788 1790
1789 rtblks = 0; 1791 rtblks = 0;
@@ -1800,7 +1802,7 @@ xfs_qm_dqusage_adjust(
1800 if (gdqp) 1802 if (gdqp)
1801 xfs_qm_dqput(gdqp); 1803 xfs_qm_dqput(gdqp);
1802 *res = BULKSTAT_RV_GIVEUP; 1804 *res = BULKSTAT_RV_GIVEUP;
1803 return (error); 1805 return error;
1804 } 1806 }
1805 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; 1807 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1806 } 1808 }
@@ -1845,7 +1847,7 @@ xfs_qm_dqusage_adjust(
1845 * Goto next inode. 1847 * Goto next inode.
1846 */ 1848 */
1847 *res = BULKSTAT_RV_DIDONE; 1849 *res = BULKSTAT_RV_DIDONE;
1848 return (0); 1850 return 0;
1849} 1851}
1850 1852
1851/* 1853/*
@@ -1918,9 +1920,7 @@ xfs_qm_quotacheck(
1918 * at this point (because we intentionally didn't in dqget_noattach). 1920 * at this point (because we intentionally didn't in dqget_noattach).
1919 */ 1921 */
1920 if (error) { 1922 if (error) {
1921 xfs_qm_dqpurge_all(mp, 1923 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF);
1922 XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA|
1923 XFS_QMOPT_PQUOTA|XFS_QMOPT_QUOTAOFF);
1924 goto error_return; 1924 goto error_return;
1925 } 1925 }
1926 /* 1926 /*
@@ -2041,7 +2041,7 @@ xfs_qm_init_quotainos(
2041 XFS_QI_UQIP(mp) = uip; 2041 XFS_QI_UQIP(mp) = uip;
2042 XFS_QI_GQIP(mp) = gip; 2042 XFS_QI_GQIP(mp) = gip;
2043 2043
2044 return (0); 2044 return 0;
2045} 2045}
2046 2046
2047 2047
@@ -2062,7 +2062,7 @@ xfs_qm_shake_freelist(
2062 int nflushes; 2062 int nflushes;
2063 2063
2064 if (howmany <= 0) 2064 if (howmany <= 0)
2065 return (0); 2065 return 0;
2066 2066
2067 nreclaimed = 0; 2067 nreclaimed = 0;
2068 restarts = 0; 2068 restarts = 0;
@@ -2088,7 +2088,7 @@ xfs_qm_shake_freelist(
2088 xfs_dqunlock(dqp); 2088 xfs_dqunlock(dqp);
2089 xfs_qm_freelist_unlock(xfs_Gqm); 2089 xfs_qm_freelist_unlock(xfs_Gqm);
2090 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 2090 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2091 return (nreclaimed); 2091 return nreclaimed;
2092 XQM_STATS_INC(xqmstats.xs_qm_dqwants); 2092 XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2093 goto tryagain; 2093 goto tryagain;
2094 } 2094 }
@@ -2163,7 +2163,7 @@ xfs_qm_shake_freelist(
2163 XFS_DQ_HASH_UNLOCK(hash); 2163 XFS_DQ_HASH_UNLOCK(hash);
2164 xfs_qm_freelist_unlock(xfs_Gqm); 2164 xfs_qm_freelist_unlock(xfs_Gqm);
2165 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 2165 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2166 return (nreclaimed); 2166 return nreclaimed;
2167 goto tryagain; 2167 goto tryagain;
2168 } 2168 }
2169 xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING"); 2169 xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING");
@@ -2188,7 +2188,7 @@ xfs_qm_shake_freelist(
2188 dqp = nextdqp; 2188 dqp = nextdqp;
2189 } 2189 }
2190 xfs_qm_freelist_unlock(xfs_Gqm); 2190 xfs_qm_freelist_unlock(xfs_Gqm);
2191 return (nreclaimed); 2191 return nreclaimed;
2192} 2192}
2193 2193
2194 2194
@@ -2202,9 +2202,9 @@ xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
2202 int ndqused, nfree, n; 2202 int ndqused, nfree, n;
2203 2203
2204 if (!kmem_shake_allow(gfp_mask)) 2204 if (!kmem_shake_allow(gfp_mask))
2205 return (0); 2205 return 0;
2206 if (!xfs_Gqm) 2206 if (!xfs_Gqm)
2207 return (0); 2207 return 0;
2208 2208
2209 nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */ 2209 nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */
2210 /* incore dquots in all f/s's */ 2210 /* incore dquots in all f/s's */
@@ -2213,7 +2213,7 @@ xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
2213 ASSERT(ndqused >= 0); 2213 ASSERT(ndqused >= 0);
2214 2214
2215 if (nfree <= ndqused && nfree < ndquot) 2215 if (nfree <= ndqused && nfree < ndquot)
2216 return (0); 2216 return 0;
2217 2217
2218 ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */ 2218 ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */
2219 n = nfree - ndqused - ndquot; /* # over target */ 2219 n = nfree - ndqused - ndquot; /* # over target */
@@ -2257,7 +2257,7 @@ xfs_qm_dqreclaim_one(void)
2257 xfs_dqunlock(dqp); 2257 xfs_dqunlock(dqp);
2258 xfs_qm_freelist_unlock(xfs_Gqm); 2258 xfs_qm_freelist_unlock(xfs_Gqm);
2259 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 2259 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2260 return (NULL); 2260 return NULL;
2261 XQM_STATS_INC(xqmstats.xs_qm_dqwants); 2261 XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2262 goto startagain; 2262 goto startagain;
2263 } 2263 }
@@ -2333,7 +2333,7 @@ xfs_qm_dqreclaim_one(void)
2333 } 2333 }
2334 2334
2335 xfs_qm_freelist_unlock(xfs_Gqm); 2335 xfs_qm_freelist_unlock(xfs_Gqm);
2336 return (dqpout); 2336 return dqpout;
2337} 2337}
2338 2338
2339 2339
@@ -2369,7 +2369,7 @@ xfs_qm_dqalloc_incore(
2369 */ 2369 */
2370 memset(&dqp->q_core, 0, sizeof(dqp->q_core)); 2370 memset(&dqp->q_core, 0, sizeof(dqp->q_core));
2371 *O_dqpp = dqp; 2371 *O_dqpp = dqp;
2372 return (B_FALSE); 2372 return B_FALSE;
2373 } 2373 }
2374 XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); 2374 XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
2375 } 2375 }
@@ -2382,7 +2382,7 @@ xfs_qm_dqalloc_incore(
2382 *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); 2382 *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
2383 atomic_inc(&xfs_Gqm->qm_totaldquots); 2383 atomic_inc(&xfs_Gqm->qm_totaldquots);
2384 2384
2385 return (B_TRUE); 2385 return B_TRUE;
2386} 2386}
2387 2387
2388 2388
@@ -2407,13 +2407,13 @@ xfs_qm_write_sb_changes(
2407 0, 2407 0,
2408 XFS_DEFAULT_LOG_COUNT))) { 2408 XFS_DEFAULT_LOG_COUNT))) {
2409 xfs_trans_cancel(tp, 0); 2409 xfs_trans_cancel(tp, 0);
2410 return (error); 2410 return error;
2411 } 2411 }
2412 2412
2413 xfs_mod_sb(tp, flags); 2413 xfs_mod_sb(tp, flags);
2414 (void) xfs_trans_commit(tp, 0, NULL); 2414 (void) xfs_trans_commit(tp, 0, NULL);
2415 2415
2416 return (0); 2416 return 0;
2417} 2417}
2418 2418
2419 2419
@@ -2463,7 +2463,7 @@ xfs_qm_vop_dqalloc(
2463 if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC | 2463 if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
2464 XFS_QMOPT_ILOCKED))) { 2464 XFS_QMOPT_ILOCKED))) {
2465 xfs_iunlock(ip, lockflags); 2465 xfs_iunlock(ip, lockflags);
2466 return (error); 2466 return error;
2467 } 2467 }
2468 } 2468 }
2469 2469
@@ -2486,7 +2486,7 @@ xfs_qm_vop_dqalloc(
2486 XFS_QMOPT_DOWARN, 2486 XFS_QMOPT_DOWARN,
2487 &uq))) { 2487 &uq))) {
2488 ASSERT(error != ENOENT); 2488 ASSERT(error != ENOENT);
2489 return (error); 2489 return error;
2490 } 2490 }
2491 /* 2491 /*
2492 * Get the ilock in the right order. 2492 * Get the ilock in the right order.
@@ -2517,7 +2517,7 @@ xfs_qm_vop_dqalloc(
2517 if (uq) 2517 if (uq)
2518 xfs_qm_dqrele(uq); 2518 xfs_qm_dqrele(uq);
2519 ASSERT(error != ENOENT); 2519 ASSERT(error != ENOENT);
2520 return (error); 2520 return error;
2521 } 2521 }
2522 xfs_dqunlock(gq); 2522 xfs_dqunlock(gq);
2523 lockflags = XFS_ILOCK_SHARED; 2523 lockflags = XFS_ILOCK_SHARED;
@@ -2565,7 +2565,7 @@ xfs_qm_vop_dqalloc(
2565 *O_gdqpp = gq; 2565 *O_gdqpp = gq;
2566 else if (gq) 2566 else if (gq)
2567 xfs_qm_dqrele(gq); 2567 xfs_qm_dqrele(gq);
2568 return (0); 2568 return 0;
2569} 2569}
2570 2570
2571/* 2571/*
@@ -2608,7 +2608,7 @@ xfs_qm_vop_chown(
2608 xfs_dqunlock(newdq); 2608 xfs_dqunlock(newdq);
2609 *IO_olddq = newdq; 2609 *IO_olddq = newdq;
2610 2610
2611 return (prevdq); 2611 return prevdq;
2612} 2612}
2613 2613
2614/* 2614/*
@@ -2702,12 +2702,12 @@ xfs_qm_vop_rename_dqattach(
2702 ip = i_tab[0]; 2702 ip = i_tab[0];
2703 2703
2704 if (! XFS_IS_QUOTA_ON(ip->i_mount)) 2704 if (! XFS_IS_QUOTA_ON(ip->i_mount))
2705 return (0); 2705 return 0;
2706 2706
2707 if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) { 2707 if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
2708 error = xfs_qm_dqattach(ip, 0); 2708 error = xfs_qm_dqattach(ip, 0);
2709 if (error) 2709 if (error)
2710 return (error); 2710 return error;
2711 } 2711 }
2712 for (i = 1; (i < 4 && i_tab[i]); i++) { 2712 for (i = 1; (i < 4 && i_tab[i]); i++) {
2713 /* 2713 /*
@@ -2717,11 +2717,11 @@ xfs_qm_vop_rename_dqattach(
2717 if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) { 2717 if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
2718 error = xfs_qm_dqattach(ip, 0); 2718 error = xfs_qm_dqattach(ip, 0);
2719 if (error) 2719 if (error)
2720 return (error); 2720 return error;
2721 } 2721 }
2722 } 2722 }
2723 } 2723 }
2724 return (0); 2724 return 0;
2725} 2725}
2726 2726
2727void 2727void
@@ -2743,6 +2743,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
2743 xfs_dqunlock(udqp); 2743 xfs_dqunlock(udqp);
2744 ASSERT(ip->i_udquot == NULL); 2744 ASSERT(ip->i_udquot == NULL);
2745 ip->i_udquot = udqp; 2745 ip->i_udquot = udqp;
2746 ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
2746 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); 2747 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
2747 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); 2748 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2748 } 2749 }
@@ -2752,7 +2753,10 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
2752 xfs_dqunlock(gdqp); 2753 xfs_dqunlock(gdqp);
2753 ASSERT(ip->i_gdquot == NULL); 2754 ASSERT(ip->i_gdquot == NULL);
2754 ip->i_gdquot = gdqp; 2755 ip->i_gdquot = gdqp;
2755 ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id)); 2756 ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
2757 ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
2758 ip->i_d.di_gid : ip->i_d.di_projid) ==
2759 be32_to_cpu(gdqp->q_core.d_id));
2756 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); 2760 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2757 } 2761 }
2758} 2762}
@@ -2762,7 +2766,7 @@ STATIC void
2762xfs_qm_freelist_init(xfs_frlist_t *ql) 2766xfs_qm_freelist_init(xfs_frlist_t *ql)
2763{ 2767{
2764 ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql; 2768 ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
2765 mutex_init(&ql->qh_lock, MUTEX_DEFAULT, "dqf"); 2769 mutex_init(&ql->qh_lock);
2766 ql->qh_version = 0; 2770 ql->qh_version = 0;
2767 ql->qh_nelems = 0; 2771 ql->qh_nelems = 0;
2768} 2772}
@@ -2772,7 +2776,7 @@ xfs_qm_freelist_destroy(xfs_frlist_t *ql)
2772{ 2776{
2773 xfs_dquot_t *dqp, *nextdqp; 2777 xfs_dquot_t *dqp, *nextdqp;
2774 2778
2775 mutex_lock(&ql->qh_lock, PINOD); 2779 mutex_lock(&ql->qh_lock);
2776 for (dqp = ql->qh_next; 2780 for (dqp = ql->qh_next;
2777 dqp != (xfs_dquot_t *)ql; ) { 2781 dqp != (xfs_dquot_t *)ql; ) {
2778 xfs_dqlock(dqp); 2782 xfs_dqlock(dqp);
@@ -2830,7 +2834,7 @@ xfs_qm_dqhashlock_nowait(
2830 int locked; 2834 int locked;
2831 2835
2832 locked = mutex_trylock(&((dqp)->q_hash->qh_lock)); 2836 locked = mutex_trylock(&((dqp)->q_hash->qh_lock));
2833 return (locked); 2837 return locked;
2834} 2838}
2835 2839
2836int 2840int
@@ -2840,7 +2844,7 @@ xfs_qm_freelist_lock_nowait(
2840 int locked; 2844 int locked;
2841 2845
2842 locked = mutex_trylock(&(xqm->qm_dqfreelist.qh_lock)); 2846 locked = mutex_trylock(&(xqm->qm_dqfreelist.qh_lock));
2843 return (locked); 2847 return locked;
2844} 2848}
2845 2849
2846STATIC int 2850STATIC int
@@ -2851,5 +2855,5 @@ xfs_qm_mplist_nowait(
2851 2855
2852 ASSERT(mp->m_quotainfo); 2856 ASSERT(mp->m_quotainfo);
2853 locked = mutex_trylock(&(XFS_QI_MPLLOCK(mp))); 2857 locked = mutex_trylock(&(XFS_QI_MPLLOCK(mp)));
2854 return (locked); 2858 return locked;
2855} 2859}
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index 12da259f2fcb..4568deb6da86 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -165,7 +165,7 @@ typedef struct xfs_dquot_acct {
165#define XFS_QM_IWARNLIMIT 5 165#define XFS_QM_IWARNLIMIT 5
166#define XFS_QM_RTBWARNLIMIT 5 166#define XFS_QM_RTBWARNLIMIT 5
167 167
168#define XFS_QM_LOCK(xqm) (mutex_lock(&xqm##_lock, PINOD)) 168#define XFS_QM_LOCK(xqm) (mutex_lock(&xqm##_lock))
169#define XFS_QM_UNLOCK(xqm) (mutex_unlock(&xqm##_lock)) 169#define XFS_QM_UNLOCK(xqm) (mutex_unlock(&xqm##_lock))
170#define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++) 170#define XFS_QM_HOLD(xqm) ((xqm)->qm_nrefs++)
171#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) 171#define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--)
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index d9d2993de435..90402a1c3983 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -363,7 +363,7 @@ xfs_qm_init(void)
363 KERN_INFO "SGI XFS Quota Management subsystem\n"; 363 KERN_INFO "SGI XFS Quota Management subsystem\n";
364 364
365 printk(message); 365 printk(message);
366 mutex_init(&xfs_Gqm_lock, MUTEX_DEFAULT, "xfs_qmlock"); 366 mutex_init(&xfs_Gqm_lock);
367 vfs_bhv_set_custom(&xfs_qmops, &xfs_qmcore_xfs); 367 vfs_bhv_set_custom(&xfs_qmops, &xfs_qmcore_xfs);
368 xfs_qm_init_procfs(); 368 xfs_qm_init_procfs();
369} 369}
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 24690e1af659..676884394aae 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -15,6 +15,9 @@
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18
19#include <linux/capability.h>
20
18#include "xfs.h" 21#include "xfs.h"
19#include "xfs_fs.h" 22#include "xfs_fs.h"
20#include "xfs_bit.h" 23#include "xfs_bit.h"
@@ -233,7 +236,7 @@ xfs_qm_scall_quotaoff(
233 */ 236 */
234 ASSERT(mp->m_quotainfo); 237 ASSERT(mp->m_quotainfo);
235 if (mp->m_quotainfo) 238 if (mp->m_quotainfo)
236 mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD); 239 mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
237 240
238 ASSERT(mp->m_quotainfo); 241 ASSERT(mp->m_quotainfo);
239 242
@@ -508,7 +511,7 @@ xfs_qm_scall_quotaon(
508 /* 511 /*
509 * Switch on quota enforcement in core. 512 * Switch on quota enforcement in core.
510 */ 513 */
511 mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD); 514 mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
512 mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD); 515 mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
513 mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); 516 mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
514 517
@@ -617,7 +620,7 @@ xfs_qm_scall_setqlim(
617 * a quotaoff from happening). (XXXThis doesn't currently happen 620 * a quotaoff from happening). (XXXThis doesn't currently happen
618 * because we take the vfslock before calling xfs_qm_sysent). 621 * because we take the vfslock before calling xfs_qm_sysent).
619 */ 622 */
620 mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD); 623 mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
621 624
622 /* 625 /*
623 * Get the dquot (locked), and join it to the transaction. 626 * Get the dquot (locked), and join it to the transaction.
@@ -1426,7 +1429,7 @@ xfs_qm_internalqcheck(
1426 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1429 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
1427 XFS_bflush(mp->m_ddev_targp); 1430 XFS_bflush(mp->m_ddev_targp);
1428 1431
1429 mutex_lock(&qcheck_lock, PINOD); 1432 mutex_lock(&qcheck_lock);
1430 /* There should be absolutely no quota activity while this 1433 /* There should be absolutely no quota activity while this
1431 is going on. */ 1434 is going on. */
1432 qmtest_udqtab = kmem_zalloc(qmtest_hashmask * 1435 qmtest_udqtab = kmem_zalloc(qmtest_hashmask *
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 7a9f3beb818c..b7ddd04aae32 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -51,7 +51,7 @@
51#define XFS_QI_MPLNEXT(mp) ((mp)->m_quotainfo->qi_dqlist.qh_next) 51#define XFS_QI_MPLNEXT(mp) ((mp)->m_quotainfo->qi_dqlist.qh_next)
52#define XFS_QI_MPLNDQUOTS(mp) ((mp)->m_quotainfo->qi_dqlist.qh_nelems) 52#define XFS_QI_MPLNDQUOTS(mp) ((mp)->m_quotainfo->qi_dqlist.qh_nelems)
53 53
54#define XQMLCK(h) (mutex_lock(&((h)->qh_lock), PINOD)) 54#define XQMLCK(h) (mutex_lock(&((h)->qh_lock)))
55#define XQMUNLCK(h) (mutex_unlock(&((h)->qh_lock))) 55#define XQMUNLCK(h) (mutex_unlock(&((h)->qh_lock)))
56#ifdef DEBUG 56#ifdef DEBUG
57struct xfs_dqhash; 57struct xfs_dqhash;
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index bb6dc91ea261..b08b3d9345b7 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -27,45 +27,12 @@ static DEFINE_SPINLOCK(xfs_err_lock);
27/* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */ 27/* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */
28#define XFS_MAX_ERR_LEVEL 7 28#define XFS_MAX_ERR_LEVEL 7
29#define XFS_ERR_MASK ((1 << 3) - 1) 29#define XFS_ERR_MASK ((1 << 3) - 1)
30static char *err_level[XFS_MAX_ERR_LEVEL+1] = 30static const char * const err_level[XFS_MAX_ERR_LEVEL+1] =
31 {KERN_EMERG, KERN_ALERT, KERN_CRIT, 31 {KERN_EMERG, KERN_ALERT, KERN_CRIT,
32 KERN_ERR, KERN_WARNING, KERN_NOTICE, 32 KERN_ERR, KERN_WARNING, KERN_NOTICE,
33 KERN_INFO, KERN_DEBUG}; 33 KERN_INFO, KERN_DEBUG};
34 34
35void 35void
36assfail(char *a, char *f, int l)
37{
38 printk("XFS assertion failed: %s, file: %s, line: %d\n", a, f, l);
39 BUG();
40}
41
42#if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM))
43
44unsigned long
45random(void)
46{
47 static unsigned long RandomValue = 1;
48 /* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */
49 register long rv = RandomValue;
50 register long lo;
51 register long hi;
52
53 hi = rv / 127773;
54 lo = rv % 127773;
55 rv = 16807 * lo - 2836 * hi;
56 if( rv <= 0 ) rv += 2147483647;
57 return( RandomValue = rv );
58}
59
60int
61get_thread_id(void)
62{
63 return current->pid;
64}
65
66#endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */
67
68void
69cmn_err(register int level, char *fmt, ...) 36cmn_err(register int level, char *fmt, ...)
70{ 37{
71 char *fp = fmt; 38 char *fp = fmt;
@@ -90,7 +57,6 @@ cmn_err(register int level, char *fmt, ...)
90 BUG(); 57 BUG();
91} 58}
92 59
93
94void 60void
95icmn_err(register int level, char *fmt, va_list ap) 61icmn_err(register int level, char *fmt, va_list ap)
96{ 62{
@@ -109,3 +75,27 @@ icmn_err(register int level, char *fmt, va_list ap)
109 if (level == CE_PANIC) 75 if (level == CE_PANIC)
110 BUG(); 76 BUG();
111} 77}
78
79void
80assfail(char *expr, char *file, int line)
81{
82 printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line);
83 BUG();
84}
85
86#if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM))
87unsigned long random(void)
88{
89 static unsigned long RandomValue = 1;
90 /* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */
91 register long rv = RandomValue;
92 register long lo;
93 register long hi;
94
95 hi = rv / 127773;
96 lo = rv % 127773;
97 rv = 16807 * lo - 2836 * hi;
98 if (rv <= 0) rv += 2147483647;
99 return RandomValue = rv;
100}
101#endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index aff558664c32..e3bf58112e7e 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -31,24 +31,23 @@ extern void icmn_err(int, char *, va_list)
31 __attribute__ ((format (printf, 2, 0))); 31 __attribute__ ((format (printf, 2, 0)));
32extern void cmn_err(int, char *, ...) 32extern void cmn_err(int, char *, ...)
33 __attribute__ ((format (printf, 2, 3))); 33 __attribute__ ((format (printf, 2, 3)));
34extern void assfail(char *expr, char *f, int l);
34 35
35#ifndef STATIC 36#define prdev(fmt,targ,args...) \
36# define STATIC static 37 printk("Device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
37#endif
38 38
39#ifdef DEBUG 39#define ASSERT_ALWAYS(expr) \
40# define ASSERT(EX) ((EX) ? ((void)0) : assfail(#EX, __FILE__, __LINE__)) 40 (unlikely((expr) != 0) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
41#else
42# define ASSERT(x) ((void)0)
43#endif
44 41
45extern void assfail(char *, char *, int); 42#ifndef DEBUG
46#ifdef DEBUG 43# define ASSERT(expr) ((void)0)
44#else
45# define ASSERT(expr) ASSERT_ALWAYS(expr)
47extern unsigned long random(void); 46extern unsigned long random(void);
48extern int get_thread_id(void);
49#endif 47#endif
50 48
51#define ASSERT_ALWAYS(EX) ((EX)?((void)0):assfail(#EX, __FILE__, __LINE__)) 49#ifndef STATIC
52#define debug_stop_all_cpus(param) /* param is "cpumask_t *" */ 50# define STATIC static
51#endif
53 52
54#endif /* __XFS_SUPPORT_DEBUG_H__ */ 53#endif /* __XFS_SUPPORT_DEBUG_H__ */
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index 70ce40914c8a..a3d565a67734 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -24,9 +24,19 @@ static uuid_t *uuid_table;
24void 24void
25uuid_init(void) 25uuid_init(void)
26{ 26{
27 mutex_init(&uuid_monitor, MUTEX_DEFAULT, "uuid_monitor"); 27 mutex_init(&uuid_monitor);
28} 28}
29 29
30
31/* IRIX interpretation of an uuid_t */
32typedef struct {
33 __be32 uu_timelow;
34 __be16 uu_timemid;
35 __be16 uu_timehi;
36 __be16 uu_clockseq;
37 __be16 uu_node[3];
38} xfs_uu_t;
39
30/* 40/*
31 * uuid_getnodeuniq - obtain the node unique fields of a UUID. 41 * uuid_getnodeuniq - obtain the node unique fields of a UUID.
32 * 42 *
@@ -36,16 +46,11 @@ uuid_init(void)
36void 46void
37uuid_getnodeuniq(uuid_t *uuid, int fsid [2]) 47uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
38{ 48{
39 char *uu = (char *)uuid; 49 xfs_uu_t *uup = (xfs_uu_t *)uuid;
40
41 /* on IRIX, this function assumes big-endian fields within
42 * the uuid, so we use INT_GET to get the same result on
43 * little-endian systems
44 */
45 50
46 fsid[0] = (INT_GET(*(u_int16_t*)(uu+8), ARCH_CONVERT) << 16) + 51 fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
47 INT_GET(*(u_int16_t*)(uu+4), ARCH_CONVERT); 52 be16_to_cpu(uup->uu_timemid);
48 fsid[1] = INT_GET(*(u_int32_t*)(uu ), ARCH_CONVERT); 53 fsid[1] = be16_to_cpu(uup->uu_timelow);
49} 54}
50 55
51void 56void
@@ -94,7 +99,7 @@ uuid_table_insert(uuid_t *uuid)
94{ 99{
95 int i, hole; 100 int i, hole;
96 101
97 mutex_lock(&uuid_monitor, PVFS); 102 mutex_lock(&uuid_monitor);
98 for (i = 0, hole = -1; i < uuid_table_size; i++) { 103 for (i = 0, hole = -1; i < uuid_table_size; i++) {
99 if (uuid_is_nil(&uuid_table[i])) { 104 if (uuid_is_nil(&uuid_table[i])) {
100 hole = i; 105 hole = i;
@@ -122,7 +127,7 @@ uuid_table_remove(uuid_t *uuid)
122{ 127{
123 int i; 128 int i;
124 129
125 mutex_lock(&uuid_monitor, PVFS); 130 mutex_lock(&uuid_monitor);
126 for (i = 0; i < uuid_table_size; i++) { 131 for (i = 0; i < uuid_table_size; i++) {
127 if (uuid_is_nil(&uuid_table[i])) 132 if (uuid_is_nil(&uuid_table[i]))
128 continue; 133 continue;
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index cc9c91b9e771..4ff0f4e41c61 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -36,6 +36,7 @@
36#include "xfs_mac.h" 36#include "xfs_mac.h"
37#include "xfs_attr.h" 37#include "xfs_attr.h"
38 38
39#include <linux/capability.h>
39#include <linux/posix_acl_xattr.h> 40#include <linux/posix_acl_xattr.h>
40 41
41STATIC int xfs_acl_setmode(vnode_t *, xfs_acl_t *, int *); 42STATIC int xfs_acl_setmode(vnode_t *, xfs_acl_t *, int *);
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 68e5051d8e24..c4836890b726 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -40,6 +40,22 @@
40#undef XFS_NATIVE_HOST 40#undef XFS_NATIVE_HOST
41#endif 41#endif
42 42
43#ifdef XFS_NATIVE_HOST
44#define cpu_to_be16(val) ((__be16)(val))
45#define cpu_to_be32(val) ((__be32)(val))
46#define cpu_to_be64(val) ((__be64)(val))
47#define be16_to_cpu(val) ((__uint16_t)(val))
48#define be32_to_cpu(val) ((__uint32_t)(val))
49#define be64_to_cpu(val) ((__uint64_t)(val))
50#else
51#define cpu_to_be16(val) (__swab16((__uint16_t)(val)))
52#define cpu_to_be32(val) (__swab32((__uint32_t)(val)))
53#define cpu_to_be64(val) (__swab64((__uint64_t)(val)))
54#define be16_to_cpu(val) (__swab16((__be16)(val)))
55#define be32_to_cpu(val) (__swab32((__be32)(val)))
56#define be64_to_cpu(val) (__swab64((__be64)(val)))
57#endif
58
43#endif /* __KERNEL__ */ 59#endif /* __KERNEL__ */
44 60
45/* do we need conversion? */ 61/* do we need conversion? */
@@ -186,7 +202,7 @@ static inline void be64_add(__be64 *a, __s64 b)
186 */ 202 */
187 203
188#define XFS_GET_DIR_INO4(di) \ 204#define XFS_GET_DIR_INO4(di) \
189 (((u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3])) 205 (((__u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
190 206
191#define XFS_PUT_DIR_INO4(from, di) \ 207#define XFS_PUT_DIR_INO4(from, di) \
192do { \ 208do { \
@@ -197,9 +213,9 @@ do { \
197} while (0) 213} while (0)
198 214
199#define XFS_DI_HI(di) \ 215#define XFS_DI_HI(di) \
200 (((u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3])) 216 (((__u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
201#define XFS_DI_LO(di) \ 217#define XFS_DI_LO(di) \
202 (((u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7])) 218 (((__u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7]))
203 219
204#define XFS_GET_DIR_INO8(di) \ 220#define XFS_GET_DIR_INO8(di) \
205 (((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \ 221 (((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 5484eeb460c8..e5e91e9c7e89 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -15,6 +15,9 @@
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18
19#include <linux/capability.h>
20
18#include "xfs.h" 21#include "xfs.h"
19#include "xfs_fs.h" 22#include "xfs_fs.h"
20#include "xfs_types.h" 23#include "xfs_types.h"
@@ -117,11 +120,6 @@ xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
117 ip->i_d.di_anextents == 0)) 120 ip->i_d.di_anextents == 0))
118 return(ENOATTR); 121 return(ENOATTR);
119 122
120 if (!(flags & (ATTR_KERNACCESS|ATTR_SECURE))) {
121 if ((error = xfs_iaccess(ip, S_IRUSR, cred)))
122 return(XFS_ERROR(error));
123 }
124
125 /* 123 /*
126 * Fill in the arg structure for this request. 124 * Fill in the arg structure for this request.
127 */ 125 */
@@ -425,7 +423,7 @@ xfs_attr_set(bhv_desc_t *bdp, const char *name, char *value, int valuelen, int f
425 struct cred *cred) 423 struct cred *cred)
426{ 424{
427 xfs_inode_t *dp; 425 xfs_inode_t *dp;
428 int namelen, error; 426 int namelen;
429 427
430 namelen = strlen(name); 428 namelen = strlen(name);
431 if (namelen >= MAXNAMELEN) 429 if (namelen >= MAXNAMELEN)
@@ -437,14 +435,6 @@ xfs_attr_set(bhv_desc_t *bdp, const char *name, char *value, int valuelen, int f
437 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 435 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
438 return (EIO); 436 return (EIO);
439 437
440 xfs_ilock(dp, XFS_ILOCK_SHARED);
441 if (!(flags & ATTR_SECURE) &&
442 (error = xfs_iaccess(dp, S_IWUSR, cred))) {
443 xfs_iunlock(dp, XFS_ILOCK_SHARED);
444 return(XFS_ERROR(error));
445 }
446 xfs_iunlock(dp, XFS_ILOCK_SHARED);
447
448 return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags); 438 return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags);
449} 439}
450 440
@@ -579,7 +569,7 @@ int
579xfs_attr_remove(bhv_desc_t *bdp, const char *name, int flags, struct cred *cred) 569xfs_attr_remove(bhv_desc_t *bdp, const char *name, int flags, struct cred *cred)
580{ 570{
581 xfs_inode_t *dp; 571 xfs_inode_t *dp;
582 int namelen, error; 572 int namelen;
583 573
584 namelen = strlen(name); 574 namelen = strlen(name);
585 if (namelen >= MAXNAMELEN) 575 if (namelen >= MAXNAMELEN)
@@ -592,11 +582,7 @@ xfs_attr_remove(bhv_desc_t *bdp, const char *name, int flags, struct cred *cred)
592 return (EIO); 582 return (EIO);
593 583
594 xfs_ilock(dp, XFS_ILOCK_SHARED); 584 xfs_ilock(dp, XFS_ILOCK_SHARED);
595 if (!(flags & ATTR_SECURE) && 585 if (XFS_IFORK_Q(dp) == 0 ||
596 (error = xfs_iaccess(dp, S_IWUSR, cred))) {
597 xfs_iunlock(dp, XFS_ILOCK_SHARED);
598 return(XFS_ERROR(error));
599 } else if (XFS_IFORK_Q(dp) == 0 ||
600 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && 586 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
601 dp->i_d.di_anextents == 0)) { 587 dp->i_d.di_anextents == 0)) {
602 xfs_iunlock(dp, XFS_ILOCK_SHARED); 588 xfs_iunlock(dp, XFS_ILOCK_SHARED);
@@ -668,12 +654,6 @@ xfs_attr_list(bhv_desc_t *bdp, char *buffer, int bufsize, int flags,
668 return (EIO); 654 return (EIO);
669 655
670 xfs_ilock(dp, XFS_ILOCK_SHARED); 656 xfs_ilock(dp, XFS_ILOCK_SHARED);
671 if (!(flags & ATTR_SECURE) &&
672 (error = xfs_iaccess(dp, S_IRUSR, cred))) {
673 xfs_iunlock(dp, XFS_ILOCK_SHARED);
674 return(XFS_ERROR(error));
675 }
676
677 /* 657 /*
678 * Decide on what work routines to call based on the inode size. 658 * Decide on what work routines to call based on the inode size.
679 */ 659 */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 1c7421840c18..fe91eac4e2a7 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -128,7 +128,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
128 return (offset >= minforkoff) ? minforkoff : 0; 128 return (offset >= minforkoff) ? minforkoff : 0;
129 } 129 }
130 130
131 if (unlikely(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) { 131 if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
132 if (bytes <= XFS_IFORK_ASIZE(dp)) 132 if (bytes <= XFS_IFORK_ASIZE(dp))
133 return mp->m_attroffset >> 3; 133 return mp->m_attroffset >> 3;
134 return 0; 134 return 0;
@@ -157,7 +157,7 @@ xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
157{ 157{
158 unsigned long s; 158 unsigned long s;
159 159
160 if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR) && 160 if ((mp->m_flags & XFS_MOUNT_ATTR2) &&
161 !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) { 161 !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) {
162 s = XFS_SB_LOCK(mp); 162 s = XFS_SB_LOCK(mp);
163 if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) { 163 if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
@@ -311,7 +311,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
311 */ 311 */
312 totsize -= size; 312 totsize -= size;
313 if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname && 313 if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname &&
314 !(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) { 314 (mp->m_flags & XFS_MOUNT_ATTR2)) {
315 /* 315 /*
316 * Last attribute now removed, revert to original 316 * Last attribute now removed, revert to original
317 * inode format making all literal area available 317 * inode format making all literal area available
@@ -330,7 +330,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
330 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); 330 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
331 ASSERT(dp->i_d.di_forkoff); 331 ASSERT(dp->i_d.di_forkoff);
332 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname || 332 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname ||
333 (mp->m_flags & XFS_MOUNT_COMPAT_ATTR)); 333 !(mp->m_flags & XFS_MOUNT_ATTR2));
334 dp->i_afp->if_ext_max = 334 dp->i_afp->if_ext_max =
335 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); 335 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
336 dp->i_df.if_ext_max = 336 dp->i_df.if_ext_max =
@@ -739,7 +739,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
739 + name_loc->namelen 739 + name_loc->namelen
740 + INT_GET(name_loc->valuelen, ARCH_CONVERT); 740 + INT_GET(name_loc->valuelen, ARCH_CONVERT);
741 } 741 }
742 if (!(dp->i_mount->m_flags & XFS_MOUNT_COMPAT_ATTR) && 742 if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
743 (bytes == sizeof(struct xfs_attr_sf_hdr))) 743 (bytes == sizeof(struct xfs_attr_sf_hdr)))
744 return(-1); 744 return(-1);
745 return(xfs_attr_shortform_bytesfit(dp, bytes)); 745 return(xfs_attr_shortform_bytesfit(dp, bytes));
@@ -778,7 +778,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
778 goto out; 778 goto out;
779 779
780 if (forkoff == -1) { 780 if (forkoff == -1) {
781 ASSERT(!(dp->i_mount->m_flags & XFS_MOUNT_COMPAT_ATTR)); 781 ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
782 782
783 /* 783 /*
784 * Last attribute was removed, revert to original 784 * Last attribute was removed, revert to original
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index f6143ff251a0..541e34109bb9 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -63,7 +63,7 @@ struct xfs_trans;
63 * the leaf_entry. The namespaces are independent only because we also look 63 * the leaf_entry. The namespaces are independent only because we also look
64 * at the namespace bit when we are looking for a matching attribute name. 64 * at the namespace bit when we are looking for a matching attribute name.
65 * 65 *
66 * We also store a "incomplete" bit in the leaf_entry. It shows that an 66 * We also store an "incomplete" bit in the leaf_entry. It shows that an
67 * attribute is in the middle of being created and should not be shown to 67 * attribute is in the middle of being created and should not be shown to
68 * the user if we crash during the time that the bit is set. We clear the 68 * the user if we crash during the time that the bit is set. We clear the
69 * bit when we have finished setting up the attribute. We do this because 69 * bit when we have finished setting up the attribute. We do this because
@@ -72,42 +72,48 @@ struct xfs_trans;
72 */ 72 */
73#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */ 73#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */
74 74
75typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */
76 __uint16_t base; /* base of free region */
77 __uint16_t size; /* length of free region */
78} xfs_attr_leaf_map_t;
79
80typedef struct xfs_attr_leaf_hdr { /* constant-structure header block */
81 xfs_da_blkinfo_t info; /* block type, links, etc. */
82 __uint16_t count; /* count of active leaf_entry's */
83 __uint16_t usedbytes; /* num bytes of names/values stored */
84 __uint16_t firstused; /* first used byte in name area */
85 __uint8_t holes; /* != 0 if blk needs compaction */
86 __uint8_t pad1;
87 xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE];
88 /* N largest free regions */
89} xfs_attr_leaf_hdr_t;
90
91typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */
92 xfs_dahash_t hashval; /* hash value of name */
93 __uint16_t nameidx; /* index into buffer of name/value */
94 __uint8_t flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */
95 __uint8_t pad2; /* unused pad byte */
96} xfs_attr_leaf_entry_t;
97
98typedef struct xfs_attr_leaf_name_local {
99 __uint16_t valuelen; /* number of bytes in value */
100 __uint8_t namelen; /* length of name bytes */
101 __uint8_t nameval[1]; /* name/value bytes */
102} xfs_attr_leaf_name_local_t;
103
104typedef struct xfs_attr_leaf_name_remote {
105 xfs_dablk_t valueblk; /* block number of value bytes */
106 __uint32_t valuelen; /* number of bytes in value */
107 __uint8_t namelen; /* length of name bytes */
108 __uint8_t name[1]; /* name bytes */
109} xfs_attr_leaf_name_remote_t;
110
75typedef struct xfs_attr_leafblock { 111typedef struct xfs_attr_leafblock {
76 struct xfs_attr_leaf_hdr { /* constant-structure header block */ 112 xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */
77 xfs_da_blkinfo_t info; /* block type, links, etc. */ 113 xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */
78 __uint16_t count; /* count of active leaf_entry's */ 114 xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */
79 __uint16_t usedbytes; /* num bytes of names/values stored */ 115 xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */
80 __uint16_t firstused; /* first used byte in name area */
81 __uint8_t holes; /* != 0 if blk needs compaction */
82 __uint8_t pad1;
83 struct xfs_attr_leaf_map { /* RLE map of free bytes */
84 __uint16_t base; /* base of free region */
85 __uint16_t size; /* length of free region */
86 } freemap[XFS_ATTR_LEAF_MAPSIZE]; /* N largest free regions */
87 } hdr;
88 struct xfs_attr_leaf_entry { /* sorted on key, not name */
89 xfs_dahash_t hashval; /* hash value of name */
90 __uint16_t nameidx; /* index into buffer of name/value */
91 __uint8_t flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */
92 __uint8_t pad2; /* unused pad byte */
93 } entries[1]; /* variable sized array */
94 struct xfs_attr_leaf_name_local {
95 __uint16_t valuelen; /* number of bytes in value */
96 __uint8_t namelen; /* length of name bytes */
97 __uint8_t nameval[1]; /* name/value bytes */
98 } namelist; /* grows from bottom of buf */
99 struct xfs_attr_leaf_name_remote {
100 xfs_dablk_t valueblk; /* block number of value bytes */
101 __uint32_t valuelen; /* number of bytes in value */
102 __uint8_t namelen; /* length of name bytes */
103 __uint8_t name[1]; /* name bytes */
104 } valuelist; /* grows from bottom of buf */
105} xfs_attr_leafblock_t; 116} xfs_attr_leafblock_t;
106typedef struct xfs_attr_leaf_hdr xfs_attr_leaf_hdr_t;
107typedef struct xfs_attr_leaf_map xfs_attr_leaf_map_t;
108typedef struct xfs_attr_leaf_entry xfs_attr_leaf_entry_t;
109typedef struct xfs_attr_leaf_name_local xfs_attr_leaf_name_local_t;
110typedef struct xfs_attr_leaf_name_remote xfs_attr_leaf_name_remote_t;
111 117
112/* 118/*
113 * Flags used in the leaf_entry[i].flags field. 119 * Flags used in the leaf_entry[i].flags field.
@@ -150,7 +156,8 @@ xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
150 (leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)]; 156 (leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)];
151} 157}
152 158
153#define XFS_ATTR_LEAF_NAME(leafp,idx) xfs_attr_leaf_name(leafp,idx) 159#define XFS_ATTR_LEAF_NAME(leafp,idx) \
160 xfs_attr_leaf_name(leafp,idx)
154static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx) 161static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
155{ 162{
156 return (&((char *) 163 return (&((char *)
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index e415a4698e9c..70625e577c70 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2146,13 +2146,176 @@ xfs_bmap_add_extent_hole_real(
2146 return 0; /* keep gcc quite */ 2146 return 0; /* keep gcc quite */
2147} 2147}
2148 2148
2149/*
2150 * Adjust the size of the new extent based on di_extsize and rt extsize.
2151 */
2152STATIC int
2153xfs_bmap_extsize_align(
2154 xfs_mount_t *mp,
2155 xfs_bmbt_irec_t *gotp, /* next extent pointer */
2156 xfs_bmbt_irec_t *prevp, /* previous extent pointer */
2157 xfs_extlen_t extsz, /* align to this extent size */
2158 int rt, /* is this a realtime inode? */
2159 int eof, /* is extent at end-of-file? */
2160 int delay, /* creating delalloc extent? */
2161 int convert, /* overwriting unwritten extent? */
2162 xfs_fileoff_t *offp, /* in/out: aligned offset */
2163 xfs_extlen_t *lenp) /* in/out: aligned length */
2164{
2165 xfs_fileoff_t orig_off; /* original offset */
2166 xfs_extlen_t orig_alen; /* original length */
2167 xfs_fileoff_t orig_end; /* original off+len */
2168 xfs_fileoff_t nexto; /* next file offset */
2169 xfs_fileoff_t prevo; /* previous file offset */
2170 xfs_fileoff_t align_off; /* temp for offset */
2171 xfs_extlen_t align_alen; /* temp for length */
2172 xfs_extlen_t temp; /* temp for calculations */
2173
2174 if (convert)
2175 return 0;
2176
2177 orig_off = align_off = *offp;
2178 orig_alen = align_alen = *lenp;
2179 orig_end = orig_off + orig_alen;
2180
2181 /*
2182 * If this request overlaps an existing extent, then don't
2183 * attempt to perform any additional alignment.
2184 */
2185 if (!delay && !eof &&
2186 (orig_off >= gotp->br_startoff) &&
2187 (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
2188 return 0;
2189 }
2190
2191 /*
2192 * If the file offset is unaligned vs. the extent size
2193 * we need to align it. This will be possible unless
2194 * the file was previously written with a kernel that didn't
2195 * perform this alignment, or if a truncate shot us in the
2196 * foot.
2197 */
2198 temp = do_mod(orig_off, extsz);
2199 if (temp) {
2200 align_alen += temp;
2201 align_off -= temp;
2202 }
2203 /*
2204 * Same adjustment for the end of the requested area.
2205 */
2206 if ((temp = (align_alen % extsz))) {
2207 align_alen += extsz - temp;
2208 }
2209 /*
2210 * If the previous block overlaps with this proposed allocation
2211 * then move the start forward without adjusting the length.
2212 */
2213 if (prevp->br_startoff != NULLFILEOFF) {
2214 if (prevp->br_startblock == HOLESTARTBLOCK)
2215 prevo = prevp->br_startoff;
2216 else
2217 prevo = prevp->br_startoff + prevp->br_blockcount;
2218 } else
2219 prevo = 0;
2220 if (align_off != orig_off && align_off < prevo)
2221 align_off = prevo;
2222 /*
2223 * If the next block overlaps with this proposed allocation
2224 * then move the start back without adjusting the length,
2225 * but not before offset 0.
2226 * This may of course make the start overlap previous block,
2227 * and if we hit the offset 0 limit then the next block
2228 * can still overlap too.
2229 */
2230 if (!eof && gotp->br_startoff != NULLFILEOFF) {
2231 if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
2232 (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
2233 nexto = gotp->br_startoff + gotp->br_blockcount;
2234 else
2235 nexto = gotp->br_startoff;
2236 } else
2237 nexto = NULLFILEOFF;
2238 if (!eof &&
2239 align_off + align_alen != orig_end &&
2240 align_off + align_alen > nexto)
2241 align_off = nexto > align_alen ? nexto - align_alen : 0;
2242 /*
2243 * If we're now overlapping the next or previous extent that
2244 * means we can't fit an extsz piece in this hole. Just move
2245 * the start forward to the first valid spot and set
2246 * the length so we hit the end.
2247 */
2248 if (align_off != orig_off && align_off < prevo)
2249 align_off = prevo;
2250 if (align_off + align_alen != orig_end &&
2251 align_off + align_alen > nexto &&
2252 nexto != NULLFILEOFF) {
2253 ASSERT(nexto > prevo);
2254 align_alen = nexto - align_off;
2255 }
2256
2257 /*
2258 * If realtime, and the result isn't a multiple of the realtime
2259 * extent size we need to remove blocks until it is.
2260 */
2261 if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
2262 /*
2263 * We're not covering the original request, or
2264 * we won't be able to once we fix the length.
2265 */
2266 if (orig_off < align_off ||
2267 orig_end > align_off + align_alen ||
2268 align_alen - temp < orig_alen)
2269 return XFS_ERROR(EINVAL);
2270 /*
2271 * Try to fix it by moving the start up.
2272 */
2273 if (align_off + temp <= orig_off) {
2274 align_alen -= temp;
2275 align_off += temp;
2276 }
2277 /*
2278 * Try to fix it by moving the end in.
2279 */
2280 else if (align_off + align_alen - temp >= orig_end)
2281 align_alen -= temp;
2282 /*
2283 * Set the start to the minimum then trim the length.
2284 */
2285 else {
2286 align_alen -= orig_off - align_off;
2287 align_off = orig_off;
2288 align_alen -= align_alen % mp->m_sb.sb_rextsize;
2289 }
2290 /*
2291 * Result doesn't cover the request, fail it.
2292 */
2293 if (orig_off < align_off || orig_end > align_off + align_alen)
2294 return XFS_ERROR(EINVAL);
2295 } else {
2296 ASSERT(orig_off >= align_off);
2297 ASSERT(orig_end <= align_off + align_alen);
2298 }
2299
2300#ifdef DEBUG
2301 if (!eof && gotp->br_startoff != NULLFILEOFF)
2302 ASSERT(align_off + align_alen <= gotp->br_startoff);
2303 if (prevp->br_startoff != NULLFILEOFF)
2304 ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
2305#endif
2306
2307 *lenp = align_alen;
2308 *offp = align_off;
2309 return 0;
2310}
2311
2149#define XFS_ALLOC_GAP_UNITS 4 2312#define XFS_ALLOC_GAP_UNITS 4
2150 2313
2151/* 2314/*
2152 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. 2315 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
2153 * It figures out where to ask the underlying allocator to put the new extent. 2316 * It figures out where to ask the underlying allocator to put the new extent.
2154 */ 2317 */
2155STATIC int /* error */ 2318STATIC int
2156xfs_bmap_alloc( 2319xfs_bmap_alloc(
2157 xfs_bmalloca_t *ap) /* bmap alloc argument struct */ 2320 xfs_bmalloca_t *ap) /* bmap alloc argument struct */
2158{ 2321{
@@ -2163,10 +2326,10 @@ xfs_bmap_alloc(
2163 xfs_mount_t *mp; /* mount point structure */ 2326 xfs_mount_t *mp; /* mount point structure */
2164 int nullfb; /* true if ap->firstblock isn't set */ 2327 int nullfb; /* true if ap->firstblock isn't set */
2165 int rt; /* true if inode is realtime */ 2328 int rt; /* true if inode is realtime */
2166#ifdef __KERNEL__ 2329 xfs_extlen_t prod = 0; /* product factor for allocators */
2167 xfs_extlen_t prod=0; /* product factor for allocators */ 2330 xfs_extlen_t ralen = 0; /* realtime allocation length */
2168 xfs_extlen_t ralen=0; /* realtime allocation length */ 2331 xfs_extlen_t align; /* minimum allocation alignment */
2169#endif 2332 xfs_rtblock_t rtx;
2170 2333
2171#define ISVALID(x,y) \ 2334#define ISVALID(x,y) \
2172 (rt ? \ 2335 (rt ? \
@@ -2182,125 +2345,25 @@ xfs_bmap_alloc(
2182 nullfb = ap->firstblock == NULLFSBLOCK; 2345 nullfb = ap->firstblock == NULLFSBLOCK;
2183 rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata; 2346 rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
2184 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); 2347 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
2185#ifdef __KERNEL__
2186 if (rt) { 2348 if (rt) {
2187 xfs_extlen_t extsz; /* file extent size for rt */ 2349 align = ap->ip->i_d.di_extsize ?
2188 xfs_fileoff_t nexto; /* next file offset */ 2350 ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
2189 xfs_extlen_t orig_alen; /* original ap->alen */ 2351 /* Set prod to match the extent size */
2190 xfs_fileoff_t orig_end; /* original off+len */ 2352 prod = align / mp->m_sb.sb_rextsize;
2191 xfs_fileoff_t orig_off; /* original ap->off */ 2353
2192 xfs_extlen_t mod_off; /* modulus calculations */ 2354 error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
2193 xfs_fileoff_t prevo; /* previous file offset */ 2355 align, rt, ap->eof, 0,
2194 xfs_rtblock_t rtx; /* realtime extent number */ 2356 ap->conv, &ap->off, &ap->alen);
2195 xfs_extlen_t temp; /* temp for rt calculations */ 2357 if (error)
2196 2358 return error;
2197 /* 2359 ASSERT(ap->alen);
2198 * Set prod to match the realtime extent size.
2199 */
2200 if (!(extsz = ap->ip->i_d.di_extsize))
2201 extsz = mp->m_sb.sb_rextsize;
2202 prod = extsz / mp->m_sb.sb_rextsize;
2203 orig_off = ap->off;
2204 orig_alen = ap->alen;
2205 orig_end = orig_off + orig_alen;
2206 /*
2207 * If the file offset is unaligned vs. the extent size
2208 * we need to align it. This will be possible unless
2209 * the file was previously written with a kernel that didn't
2210 * perform this alignment.
2211 */
2212 mod_off = do_mod(orig_off, extsz);
2213 if (mod_off) {
2214 ap->alen += mod_off;
2215 ap->off -= mod_off;
2216 }
2217 /*
2218 * Same adjustment for the end of the requested area.
2219 */
2220 if ((temp = (ap->alen % extsz)))
2221 ap->alen += extsz - temp;
2222 /*
2223 * If the previous block overlaps with this proposed allocation
2224 * then move the start forward without adjusting the length.
2225 */
2226 prevo =
2227 ap->prevp->br_startoff == NULLFILEOFF ?
2228 0 :
2229 (ap->prevp->br_startoff +
2230 ap->prevp->br_blockcount);
2231 if (ap->off != orig_off && ap->off < prevo)
2232 ap->off = prevo;
2233 /*
2234 * If the next block overlaps with this proposed allocation
2235 * then move the start back without adjusting the length,
2236 * but not before offset 0.
2237 * This may of course make the start overlap previous block,
2238 * and if we hit the offset 0 limit then the next block
2239 * can still overlap too.
2240 */
2241 nexto = (ap->eof || ap->gotp->br_startoff == NULLFILEOFF) ?
2242 NULLFILEOFF : ap->gotp->br_startoff;
2243 if (!ap->eof &&
2244 ap->off + ap->alen != orig_end &&
2245 ap->off + ap->alen > nexto)
2246 ap->off = nexto > ap->alen ? nexto - ap->alen : 0;
2247 /*
2248 * If we're now overlapping the next or previous extent that
2249 * means we can't fit an extsz piece in this hole. Just move
2250 * the start forward to the first valid spot and set
2251 * the length so we hit the end.
2252 */
2253 if ((ap->off != orig_off && ap->off < prevo) ||
2254 (ap->off + ap->alen != orig_end &&
2255 ap->off + ap->alen > nexto)) {
2256 ap->off = prevo;
2257 ap->alen = nexto - prevo;
2258 }
2259 /*
2260 * If the result isn't a multiple of rtextents we need to
2261 * remove blocks until it is.
2262 */
2263 if ((temp = (ap->alen % mp->m_sb.sb_rextsize))) {
2264 /*
2265 * We're not covering the original request, or
2266 * we won't be able to once we fix the length.
2267 */
2268 if (orig_off < ap->off ||
2269 orig_end > ap->off + ap->alen ||
2270 ap->alen - temp < orig_alen)
2271 return XFS_ERROR(EINVAL);
2272 /*
2273 * Try to fix it by moving the start up.
2274 */
2275 if (ap->off + temp <= orig_off) {
2276 ap->alen -= temp;
2277 ap->off += temp;
2278 }
2279 /*
2280 * Try to fix it by moving the end in.
2281 */
2282 else if (ap->off + ap->alen - temp >= orig_end)
2283 ap->alen -= temp;
2284 /*
2285 * Set the start to the minimum then trim the length.
2286 */
2287 else {
2288 ap->alen -= orig_off - ap->off;
2289 ap->off = orig_off;
2290 ap->alen -= ap->alen % mp->m_sb.sb_rextsize;
2291 }
2292 /*
2293 * Result doesn't cover the request, fail it.
2294 */
2295 if (orig_off < ap->off || orig_end > ap->off + ap->alen)
2296 return XFS_ERROR(EINVAL);
2297 }
2298 ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0); 2360 ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
2361
2299 /* 2362 /*
2300 * If the offset & length are not perfectly aligned 2363 * If the offset & length are not perfectly aligned
2301 * then kill prod, it will just get us in trouble. 2364 * then kill prod, it will just get us in trouble.
2302 */ 2365 */
2303 if (do_mod(ap->off, extsz) || ap->alen % extsz) 2366 if (do_mod(ap->off, align) || ap->alen % align)
2304 prod = 1; 2367 prod = 1;
2305 /* 2368 /*
2306 * Set ralen to be the actual requested length in rtextents. 2369 * Set ralen to be the actual requested length in rtextents.
@@ -2326,15 +2389,24 @@ xfs_bmap_alloc(
2326 ap->rval = rtx * mp->m_sb.sb_rextsize; 2389 ap->rval = rtx * mp->m_sb.sb_rextsize;
2327 } else 2390 } else
2328 ap->rval = 0; 2391 ap->rval = 0;
2392 } else {
2393 align = (ap->userdata && ap->ip->i_d.di_extsize &&
2394 (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
2395 ap->ip->i_d.di_extsize : 0;
2396 if (unlikely(align)) {
2397 error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
2398 align, rt,
2399 ap->eof, 0, ap->conv,
2400 &ap->off, &ap->alen);
2401 ASSERT(!error);
2402 ASSERT(ap->alen);
2403 }
2404 if (nullfb)
2405 ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
2406 else
2407 ap->rval = ap->firstblock;
2329 } 2408 }
2330#else 2409
2331 if (rt)
2332 ap->rval = 0;
2333#endif /* __KERNEL__ */
2334 else if (nullfb)
2335 ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
2336 else
2337 ap->rval = ap->firstblock;
2338 /* 2410 /*
2339 * If allocating at eof, and there's a previous real block, 2411 * If allocating at eof, and there's a previous real block,
2340 * try to use it's last block as our starting point. 2412 * try to use it's last block as our starting point.
@@ -2598,11 +2670,12 @@ xfs_bmap_alloc(
2598 args.total = ap->total; 2670 args.total = ap->total;
2599 args.minlen = ap->minlen; 2671 args.minlen = ap->minlen;
2600 } 2672 }
2601 if (ap->ip->i_d.di_extsize) { 2673 if (unlikely(ap->userdata && ap->ip->i_d.di_extsize &&
2674 (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) {
2602 args.prod = ap->ip->i_d.di_extsize; 2675 args.prod = ap->ip->i_d.di_extsize;
2603 if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod))) 2676 if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
2604 args.mod = (xfs_extlen_t)(args.prod - args.mod); 2677 args.mod = (xfs_extlen_t)(args.prod - args.mod);
2605 } else if (mp->m_sb.sb_blocksize >= NBPP) { 2678 } else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) {
2606 args.prod = 1; 2679 args.prod = 1;
2607 args.mod = 0; 2680 args.mod = 0;
2608 } else { 2681 } else {
@@ -3580,14 +3653,16 @@ xfs_bmap_search_extents(
3580 3653
3581 ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp, 3654 ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp,
3582 lastxp, gotp, prevp); 3655 lastxp, gotp, prevp);
3583 rt = ip->i_d.di_flags & XFS_DIFLAG_REALTIME; 3656 rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
3584 if(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM)) { 3657 if (unlikely(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM))) {
3585 cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld " 3658 cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld "
3586 "start_block : %llx start_off : %llx blkcnt : %llx " 3659 "start_block : %llx start_off : %llx blkcnt : %llx "
3587 "extent-state : %x \n", 3660 "extent-state : %x \n",
3588 (ip->i_mount)->m_fsname,(long long)ip->i_ino, 3661 (ip->i_mount)->m_fsname, (long long)ip->i_ino,
3589 gotp->br_startblock, gotp->br_startoff, 3662 (unsigned long long)gotp->br_startblock,
3590 gotp->br_blockcount,gotp->br_state); 3663 (unsigned long long)gotp->br_startoff,
3664 (unsigned long long)gotp->br_blockcount,
3665 gotp->br_state);
3591 } 3666 }
3592 return ep; 3667 return ep;
3593} 3668}
@@ -3875,7 +3950,7 @@ xfs_bmap_add_attrfork(
3875 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); 3950 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
3876 if (!ip->i_d.di_forkoff) 3951 if (!ip->i_d.di_forkoff)
3877 ip->i_d.di_forkoff = mp->m_attroffset >> 3; 3952 ip->i_d.di_forkoff = mp->m_attroffset >> 3;
3878 else if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) 3953 else if (mp->m_flags & XFS_MOUNT_ATTR2)
3879 version = 2; 3954 version = 2;
3880 break; 3955 break;
3881 default: 3956 default:
@@ -4023,13 +4098,13 @@ xfs_bmap_compute_maxlevels(
4023 */ 4098 */
4024 if (whichfork == XFS_DATA_FORK) { 4099 if (whichfork == XFS_DATA_FORK) {
4025 maxleafents = MAXEXTNUM; 4100 maxleafents = MAXEXTNUM;
4026 sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ? 4101 sz = (mp->m_flags & XFS_MOUNT_ATTR2) ?
4027 mp->m_attroffset : XFS_BMDR_SPACE_CALC(MINDBTPTRS); 4102 XFS_BMDR_SPACE_CALC(MINDBTPTRS) : mp->m_attroffset;
4028 } else { 4103 } else {
4029 maxleafents = MAXAEXTNUM; 4104 maxleafents = MAXAEXTNUM;
4030 sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ? 4105 sz = (mp->m_flags & XFS_MOUNT_ATTR2) ?
4031 mp->m_sb.sb_inodesize - mp->m_attroffset : 4106 XFS_BMDR_SPACE_CALC(MINABTPTRS) :
4032 XFS_BMDR_SPACE_CALC(MINABTPTRS); 4107 mp->m_sb.sb_inodesize - mp->m_attroffset;
4033 } 4108 }
4034 maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); 4109 maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
4035 minleafrecs = mp->m_bmap_dmnr[0]; 4110 minleafrecs = mp->m_bmap_dmnr[0];
@@ -4418,8 +4493,8 @@ xfs_bmap_read_extents(
4418 num_recs = be16_to_cpu(block->bb_numrecs); 4493 num_recs = be16_to_cpu(block->bb_numrecs);
4419 if (unlikely(i + num_recs > room)) { 4494 if (unlikely(i + num_recs > room)) {
4420 ASSERT(i + num_recs <= room); 4495 ASSERT(i + num_recs <= room);
4421 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 4496 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
4422 "corrupt dinode %Lu, (btree extents). Unmount and run xfs_repair.", 4497 "corrupt dinode %Lu, (btree extents).",
4423 (unsigned long long) ip->i_ino); 4498 (unsigned long long) ip->i_ino);
4424 XFS_ERROR_REPORT("xfs_bmap_read_extents(1)", 4499 XFS_ERROR_REPORT("xfs_bmap_read_extents(1)",
4425 XFS_ERRLEVEL_LOW, 4500 XFS_ERRLEVEL_LOW,
@@ -4590,6 +4665,7 @@ xfs_bmapi(
4590 char contig; /* allocation must be one extent */ 4665 char contig; /* allocation must be one extent */
4591 char delay; /* this request is for delayed alloc */ 4666 char delay; /* this request is for delayed alloc */
4592 char exact; /* don't do all of wasdelayed extent */ 4667 char exact; /* don't do all of wasdelayed extent */
4668 char convert; /* unwritten extent I/O completion */
4593 xfs_bmbt_rec_t *ep; /* extent list entry pointer */ 4669 xfs_bmbt_rec_t *ep; /* extent list entry pointer */
4594 int error; /* error return */ 4670 int error; /* error return */
4595 xfs_bmbt_irec_t got; /* current extent list record */ 4671 xfs_bmbt_irec_t got; /* current extent list record */
@@ -4643,7 +4719,7 @@ xfs_bmapi(
4643 } 4719 }
4644 if (XFS_FORCED_SHUTDOWN(mp)) 4720 if (XFS_FORCED_SHUTDOWN(mp))
4645 return XFS_ERROR(EIO); 4721 return XFS_ERROR(EIO);
4646 rt = XFS_IS_REALTIME_INODE(ip); 4722 rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
4647 ifp = XFS_IFORK_PTR(ip, whichfork); 4723 ifp = XFS_IFORK_PTR(ip, whichfork);
4648 ASSERT(ifp->if_ext_max == 4724 ASSERT(ifp->if_ext_max ==
4649 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); 4725 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
@@ -4654,6 +4730,7 @@ xfs_bmapi(
4654 delay = (flags & XFS_BMAPI_DELAY) != 0; 4730 delay = (flags & XFS_BMAPI_DELAY) != 0;
4655 trim = (flags & XFS_BMAPI_ENTIRE) == 0; 4731 trim = (flags & XFS_BMAPI_ENTIRE) == 0;
4656 userdata = (flags & XFS_BMAPI_METADATA) == 0; 4732 userdata = (flags & XFS_BMAPI_METADATA) == 0;
4733 convert = (flags & XFS_BMAPI_CONVERT) != 0;
4657 exact = (flags & XFS_BMAPI_EXACT) != 0; 4734 exact = (flags & XFS_BMAPI_EXACT) != 0;
4658 rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0; 4735 rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
4659 contig = (flags & XFS_BMAPI_CONTIG) != 0; 4736 contig = (flags & XFS_BMAPI_CONTIG) != 0;
@@ -4748,15 +4825,25 @@ xfs_bmapi(
4748 } 4825 }
4749 minlen = contig ? alen : 1; 4826 minlen = contig ? alen : 1;
4750 if (delay) { 4827 if (delay) {
4751 xfs_extlen_t extsz = 0; 4828 xfs_extlen_t extsz;
4752 4829
4753 /* Figure out the extent size, adjust alen */ 4830 /* Figure out the extent size, adjust alen */
4754 if (rt) { 4831 if (rt) {
4755 if (!(extsz = ip->i_d.di_extsize)) 4832 if (!(extsz = ip->i_d.di_extsize))
4756 extsz = mp->m_sb.sb_rextsize; 4833 extsz = mp->m_sb.sb_rextsize;
4757 alen = roundup(alen, extsz); 4834 } else {
4758 extsz = alen / mp->m_sb.sb_rextsize; 4835 extsz = ip->i_d.di_extsize;
4759 } 4836 }
4837 if (extsz) {
4838 error = xfs_bmap_extsize_align(mp,
4839 &got, &prev, extsz,
4840 rt, eof, delay, convert,
4841 &aoff, &alen);
4842 ASSERT(!error);
4843 }
4844
4845 if (rt)
4846 extsz = alen / mp->m_sb.sb_rextsize;
4760 4847
4761 /* 4848 /*
4762 * Make a transaction-less quota reservation for 4849 * Make a transaction-less quota reservation for
@@ -4785,32 +4872,33 @@ xfs_bmapi(
4785 xfs_bmap_worst_indlen(ip, alen); 4872 xfs_bmap_worst_indlen(ip, alen);
4786 ASSERT(indlen > 0); 4873 ASSERT(indlen > 0);
4787 4874
4788 if (rt) 4875 if (rt) {
4789 error = xfs_mod_incore_sb(mp, 4876 error = xfs_mod_incore_sb(mp,
4790 XFS_SBS_FREXTENTS, 4877 XFS_SBS_FREXTENTS,
4791 -(extsz), rsvd); 4878 -(extsz), rsvd);
4792 else 4879 } else {
4793 error = xfs_mod_incore_sb(mp, 4880 error = xfs_mod_incore_sb(mp,
4794 XFS_SBS_FDBLOCKS, 4881 XFS_SBS_FDBLOCKS,
4795 -(alen), rsvd); 4882 -(alen), rsvd);
4883 }
4796 if (!error) { 4884 if (!error) {
4797 error = xfs_mod_incore_sb(mp, 4885 error = xfs_mod_incore_sb(mp,
4798 XFS_SBS_FDBLOCKS, 4886 XFS_SBS_FDBLOCKS,
4799 -(indlen), rsvd); 4887 -(indlen), rsvd);
4800 if (error && rt) { 4888 if (error && rt)
4801 xfs_mod_incore_sb(ip->i_mount, 4889 xfs_mod_incore_sb(mp,
4802 XFS_SBS_FREXTENTS, 4890 XFS_SBS_FREXTENTS,
4803 extsz, rsvd); 4891 extsz, rsvd);
4804 } else if (error) { 4892 else if (error)
4805 xfs_mod_incore_sb(ip->i_mount, 4893 xfs_mod_incore_sb(mp,
4806 XFS_SBS_FDBLOCKS, 4894 XFS_SBS_FDBLOCKS,
4807 alen, rsvd); 4895 alen, rsvd);
4808 }
4809 } 4896 }
4810 4897
4811 if (error) { 4898 if (error) {
4812 if (XFS_IS_QUOTA_ON(ip->i_mount)) 4899 if (XFS_IS_QUOTA_ON(mp))
4813 /* unreserve the blocks now */ 4900 /* unreserve the blocks now */
4901 (void)
4814 XFS_TRANS_UNRESERVE_QUOTA_NBLKS( 4902 XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
4815 mp, NULL, ip, 4903 mp, NULL, ip,
4816 (long)alen, 0, rt ? 4904 (long)alen, 0, rt ?
@@ -4849,6 +4937,7 @@ xfs_bmapi(
4849 bma.firstblock = *firstblock; 4937 bma.firstblock = *firstblock;
4850 bma.alen = alen; 4938 bma.alen = alen;
4851 bma.off = aoff; 4939 bma.off = aoff;
4940 bma.conv = convert;
4852 bma.wasdel = wasdelay; 4941 bma.wasdel = wasdelay;
4853 bma.minlen = minlen; 4942 bma.minlen = minlen;
4854 bma.low = flist->xbf_low; 4943 bma.low = flist->xbf_low;
@@ -5270,8 +5359,7 @@ xfs_bunmapi(
5270 return 0; 5359 return 0;
5271 } 5360 }
5272 XFS_STATS_INC(xs_blk_unmap); 5361 XFS_STATS_INC(xs_blk_unmap);
5273 isrt = (whichfork == XFS_DATA_FORK) && 5362 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5274 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME);
5275 start = bno; 5363 start = bno;
5276 bno = start + len - 1; 5364 bno = start + len - 1;
5277 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, 5365 ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
@@ -5443,7 +5531,7 @@ xfs_bunmapi(
5443 } 5531 }
5444 if (wasdel) { 5532 if (wasdel) {
5445 ASSERT(STARTBLOCKVAL(del.br_startblock) > 0); 5533 ASSERT(STARTBLOCKVAL(del.br_startblock) > 0);
5446 /* Update realtim/data freespace, unreserve quota */ 5534 /* Update realtime/data freespace, unreserve quota */
5447 if (isrt) { 5535 if (isrt) {
5448 xfs_filblks_t rtexts; 5536 xfs_filblks_t rtexts;
5449 5537
@@ -5451,14 +5539,14 @@ xfs_bunmapi(
5451 do_div(rtexts, mp->m_sb.sb_rextsize); 5539 do_div(rtexts, mp->m_sb.sb_rextsize);
5452 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, 5540 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
5453 (int)rtexts, rsvd); 5541 (int)rtexts, rsvd);
5454 XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip, 5542 (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
5455 -((long)del.br_blockcount), 0, 5543 NULL, ip, -((long)del.br_blockcount), 0,
5456 XFS_QMOPT_RES_RTBLKS); 5544 XFS_QMOPT_RES_RTBLKS);
5457 } else { 5545 } else {
5458 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, 5546 xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
5459 (int)del.br_blockcount, rsvd); 5547 (int)del.br_blockcount, rsvd);
5460 XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip, 5548 (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
5461 -((long)del.br_blockcount), 0, 5549 NULL, ip, -((long)del.br_blockcount), 0,
5462 XFS_QMOPT_RES_REGBLKS); 5550 XFS_QMOPT_RES_REGBLKS);
5463 } 5551 }
5464 ip->i_delayed_blks -= del.br_blockcount; 5552 ip->i_delayed_blks -= del.br_blockcount;
@@ -5652,7 +5740,9 @@ xfs_getbmap(
5652 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 5740 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
5653 return XFS_ERROR(EINVAL); 5741 return XFS_ERROR(EINVAL);
5654 if (whichfork == XFS_DATA_FORK) { 5742 if (whichfork == XFS_DATA_FORK) {
5655 if (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC) { 5743 if ((ip->i_d.di_extsize && (ip->i_d.di_flags &
5744 (XFS_DIFLAG_REALTIME|XFS_DIFLAG_EXTSIZE))) ||
5745 ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
5656 prealloced = 1; 5746 prealloced = 1;
5657 fixlen = XFS_MAXIOFFSET(mp); 5747 fixlen = XFS_MAXIOFFSET(mp);
5658 } else { 5748 } else {
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 2e0717a01309..12cc63dfc2c4 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -62,6 +62,10 @@ typedef struct xfs_bmap_free
62#define XFS_BMAPI_IGSTATE 0x200 /* Ignore state - */ 62#define XFS_BMAPI_IGSTATE 0x200 /* Ignore state - */
63 /* combine contig. space */ 63 /* combine contig. space */
64#define XFS_BMAPI_CONTIG 0x400 /* must allocate only one extent */ 64#define XFS_BMAPI_CONTIG 0x400 /* must allocate only one extent */
65/* XFS_BMAPI_DIRECT_IO 0x800 */
66#define XFS_BMAPI_CONVERT 0x1000 /* unwritten extent conversion - */
67 /* need write cache flushing and no */
68 /* additional allocation alignments */
65 69
66#define XFS_BMAPI_AFLAG(w) xfs_bmapi_aflag(w) 70#define XFS_BMAPI_AFLAG(w) xfs_bmapi_aflag(w)
67static inline int xfs_bmapi_aflag(int w) 71static inline int xfs_bmapi_aflag(int w)
@@ -101,7 +105,8 @@ typedef struct xfs_bmalloca {
101 char wasdel; /* replacing a delayed allocation */ 105 char wasdel; /* replacing a delayed allocation */
102 char userdata;/* set if is user data */ 106 char userdata;/* set if is user data */
103 char low; /* low on space, using seq'l ags */ 107 char low; /* low on space, using seq'l ags */
104 char aeof; /* allocated space at eof */ 108 char aeof; /* allocated space at eof */
109 char conv; /* overwriting unwritten extents */
105} xfs_bmalloca_t; 110} xfs_bmalloca_t;
106 111
107#ifdef __KERNEL__ 112#ifdef __KERNEL__
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index 328a528b926d..f57cc9ac875e 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -57,7 +57,7 @@ struct xfs_mount_args {
57/* 57/*
58 * XFS mount option flags -- args->flags1 58 * XFS mount option flags -- args->flags1
59 */ 59 */
60#define XFSMNT_COMPAT_ATTR 0x00000001 /* do not use ATTR2 format */ 60#define XFSMNT_ATTR2 0x00000001 /* allow ATTR2 EA format */
61#define XFSMNT_WSYNC 0x00000002 /* safe mode nfs mount 61#define XFSMNT_WSYNC 0x00000002 /* safe mode nfs mount
62 * compatible */ 62 * compatible */
63#define XFSMNT_INO64 0x00000004 /* move inode numbers up 63#define XFSMNT_INO64 0x00000004 /* move inode numbers up
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 070259a4254c..c6191d00ad27 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -60,8 +60,6 @@ xfs_swapext(
60 xfs_bstat_t *sbp; 60 xfs_bstat_t *sbp;
61 struct file *fp = NULL, *tfp = NULL; 61 struct file *fp = NULL, *tfp = NULL;
62 vnode_t *vp, *tvp; 62 vnode_t *vp, *tvp;
63 bhv_desc_t *bdp, *tbdp;
64 vn_bhv_head_t *bhp, *tbhp;
65 static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; 63 static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
66 int ilf_fields, tilf_fields; 64 int ilf_fields, tilf_fields;
67 int error = 0; 65 int error = 0;
@@ -90,13 +88,10 @@ xfs_swapext(
90 goto error0; 88 goto error0;
91 } 89 }
92 90
93 bhp = VN_BHV_HEAD(vp); 91 ip = xfs_vtoi(vp);
94 bdp = vn_bhv_lookup(bhp, &xfs_vnodeops); 92 if (ip == NULL) {
95 if (bdp == NULL) {
96 error = XFS_ERROR(EBADF); 93 error = XFS_ERROR(EBADF);
97 goto error0; 94 goto error0;
98 } else {
99 ip = XFS_BHVTOI(bdp);
100 } 95 }
101 96
102 if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) || 97 if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) ||
@@ -105,13 +100,10 @@ xfs_swapext(
105 goto error0; 100 goto error0;
106 } 101 }
107 102
108 tbhp = VN_BHV_HEAD(tvp); 103 tip = xfs_vtoi(tvp);
109 tbdp = vn_bhv_lookup(tbhp, &xfs_vnodeops); 104 if (tip == NULL) {
110 if (tbdp == NULL) {
111 error = XFS_ERROR(EBADF); 105 error = XFS_ERROR(EBADF);
112 goto error0; 106 goto error0;
113 } else {
114 tip = XFS_BHVTOI(tbdp);
115 } 107 }
116 108
117 if (ip->i_mount != tip->i_mount) { 109 if (ip->i_mount != tip->i_mount) {
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index c5a0e537ff1a..79d0d9e1fbab 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -199,10 +199,16 @@ typedef enum xfs_dinode_fmt
199 199
200#define XFS_DFORK_DSIZE(dip,mp) \ 200#define XFS_DFORK_DSIZE(dip,mp) \
201 XFS_CFORK_DSIZE_DISK(&(dip)->di_core, mp) 201 XFS_CFORK_DSIZE_DISK(&(dip)->di_core, mp)
202#define XFS_DFORK_DSIZE_HOST(dip,mp) \
203 XFS_CFORK_DSIZE(&(dip)->di_core, mp)
202#define XFS_DFORK_ASIZE(dip,mp) \ 204#define XFS_DFORK_ASIZE(dip,mp) \
203 XFS_CFORK_ASIZE_DISK(&(dip)->di_core, mp) 205 XFS_CFORK_ASIZE_DISK(&(dip)->di_core, mp)
206#define XFS_DFORK_ASIZE_HOST(dip,mp) \
207 XFS_CFORK_ASIZE(&(dip)->di_core, mp)
204#define XFS_DFORK_SIZE(dip,mp,w) \ 208#define XFS_DFORK_SIZE(dip,mp,w) \
205 XFS_CFORK_SIZE_DISK(&(dip)->di_core, mp, w) 209 XFS_CFORK_SIZE_DISK(&(dip)->di_core, mp, w)
210#define XFS_DFORK_SIZE_HOST(dip,mp,w) \
211 XFS_CFORK_SIZE(&(dip)->di_core, mp, w)
206 212
207#define XFS_DFORK_Q(dip) XFS_CFORK_Q_DISK(&(dip)->di_core) 213#define XFS_DFORK_Q(dip) XFS_CFORK_Q_DISK(&(dip)->di_core)
208#define XFS_DFORK_BOFF(dip) XFS_CFORK_BOFF_DISK(&(dip)->di_core) 214#define XFS_DFORK_BOFF(dip) XFS_CFORK_BOFF_DISK(&(dip)->di_core)
@@ -216,6 +222,7 @@ typedef enum xfs_dinode_fmt
216#define XFS_CFORK_FMT_SET(dcp,w,n) \ 222#define XFS_CFORK_FMT_SET(dcp,w,n) \
217 ((w) == XFS_DATA_FORK ? \ 223 ((w) == XFS_DATA_FORK ? \
218 ((dcp)->di_format = (n)) : ((dcp)->di_aformat = (n))) 224 ((dcp)->di_format = (n)) : ((dcp)->di_aformat = (n)))
225#define XFS_DFORK_FORMAT(dip,w) XFS_CFORK_FORMAT(&(dip)->di_core, w)
219 226
220#define XFS_CFORK_NEXTENTS_DISK(dcp,w) \ 227#define XFS_CFORK_NEXTENTS_DISK(dcp,w) \
221 ((w) == XFS_DATA_FORK ? \ 228 ((w) == XFS_DATA_FORK ? \
@@ -223,13 +230,13 @@ typedef enum xfs_dinode_fmt
223 INT_GET((dcp)->di_anextents, ARCH_CONVERT)) 230 INT_GET((dcp)->di_anextents, ARCH_CONVERT))
224#define XFS_CFORK_NEXTENTS(dcp,w) \ 231#define XFS_CFORK_NEXTENTS(dcp,w) \
225 ((w) == XFS_DATA_FORK ? (dcp)->di_nextents : (dcp)->di_anextents) 232 ((w) == XFS_DATA_FORK ? (dcp)->di_nextents : (dcp)->di_anextents)
233#define XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w)
234#define XFS_DFORK_NEXTENTS_HOST(dip,w) XFS_CFORK_NEXTENTS(&(dip)->di_core, w)
226 235
227#define XFS_CFORK_NEXT_SET(dcp,w,n) \ 236#define XFS_CFORK_NEXT_SET(dcp,w,n) \
228 ((w) == XFS_DATA_FORK ? \ 237 ((w) == XFS_DATA_FORK ? \
229 ((dcp)->di_nextents = (n)) : ((dcp)->di_anextents = (n))) 238 ((dcp)->di_nextents = (n)) : ((dcp)->di_anextents = (n)))
230 239
231#define XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w)
232
233#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp)) 240#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp))
234 241
235/* 242/*
@@ -246,8 +253,10 @@ typedef enum xfs_dinode_fmt
246#define XFS_DIFLAG_NOATIME_BIT 6 /* do not update atime */ 253#define XFS_DIFLAG_NOATIME_BIT 6 /* do not update atime */
247#define XFS_DIFLAG_NODUMP_BIT 7 /* do not dump */ 254#define XFS_DIFLAG_NODUMP_BIT 7 /* do not dump */
248#define XFS_DIFLAG_RTINHERIT_BIT 8 /* create with realtime bit set */ 255#define XFS_DIFLAG_RTINHERIT_BIT 8 /* create with realtime bit set */
249#define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */ 256#define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */
250#define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */ 257#define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */
258#define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */
259#define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
251#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) 260#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT)
252#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) 261#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT)
253#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) 262#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT)
@@ -259,11 +268,14 @@ typedef enum xfs_dinode_fmt
259#define XFS_DIFLAG_RTINHERIT (1 << XFS_DIFLAG_RTINHERIT_BIT) 268#define XFS_DIFLAG_RTINHERIT (1 << XFS_DIFLAG_RTINHERIT_BIT)
260#define XFS_DIFLAG_PROJINHERIT (1 << XFS_DIFLAG_PROJINHERIT_BIT) 269#define XFS_DIFLAG_PROJINHERIT (1 << XFS_DIFLAG_PROJINHERIT_BIT)
261#define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT) 270#define XFS_DIFLAG_NOSYMLINKS (1 << XFS_DIFLAG_NOSYMLINKS_BIT)
271#define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT)
272#define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
262 273
263#define XFS_DIFLAG_ANY \ 274#define XFS_DIFLAG_ANY \
264 (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \ 275 (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
265 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \ 276 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
266 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \ 277 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
267 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS) 278 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
279 XFS_DIFLAG_EXTSZINHERIT)
268 280
269#endif /* __XFS_DINODE_H__ */ 281#endif /* __XFS_DINODE_H__ */
diff --git a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
index 3dd30391f551..bb87d2a700a9 100644
--- a/fs/xfs/xfs_dir.c
+++ b/fs/xfs/xfs_dir.c
@@ -176,7 +176,7 @@ xfs_dir_mount(xfs_mount_t *mp)
176 uint shortcount, leafcount, count; 176 uint shortcount, leafcount, count;
177 177
178 mp->m_dirversion = 1; 178 mp->m_dirversion = 1;
179 if (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) { 179 if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
180 shortcount = (mp->m_attroffset - 180 shortcount = (mp->m_attroffset -
181 (uint)sizeof(xfs_dir_sf_hdr_t)) / 181 (uint)sizeof(xfs_dir_sf_hdr_t)) /
182 (uint)sizeof(xfs_dir_sf_entry_t); 182 (uint)sizeof(xfs_dir_sf_entry_t);
diff --git a/fs/xfs/xfs_dir.h b/fs/xfs/xfs_dir.h
index 488defe86ba6..8cc8afb9f6c0 100644
--- a/fs/xfs/xfs_dir.h
+++ b/fs/xfs/xfs_dir.h
@@ -135,6 +135,8 @@ void xfs_dir_startup(void); /* called exactly once */
135 ((mp)->m_dirops.xd_shortform_to_single(args)) 135 ((mp)->m_dirops.xd_shortform_to_single(args))
136 136
137#define XFS_DIR_IS_V1(mp) ((mp)->m_dirversion == 1) 137#define XFS_DIR_IS_V1(mp) ((mp)->m_dirversion == 1)
138#define XFS_DIR_IS_V2(mp) ((mp)->m_dirversion == 2)
138extern xfs_dirops_t xfsv1_dirops; 139extern xfs_dirops_t xfsv1_dirops;
140extern xfs_dirops_t xfsv2_dirops;
139 141
140#endif /* __XFS_DIR_H__ */ 142#endif /* __XFS_DIR_H__ */
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 7e24ffeda9e1..3158f5dc431f 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -72,9 +72,6 @@ typedef struct xfs_dir2_put_args {
72 struct uio *uio; /* uio control structure */ 72 struct uio *uio; /* uio control structure */
73} xfs_dir2_put_args_t; 73} xfs_dir2_put_args_t;
74 74
75#define XFS_DIR_IS_V2(mp) ((mp)->m_dirversion == 2)
76extern xfs_dirops_t xfsv2_dirops;
77
78/* 75/*
79 * Other interfaces used by the rest of the dir v2 code. 76 * Other interfaces used by the rest of the dir v2 code.
80 */ 77 */
diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c
index 950df31efc46..e83074016abb 100644
--- a/fs/xfs/xfs_dir_leaf.c
+++ b/fs/xfs/xfs_dir_leaf.c
@@ -147,7 +147,7 @@ xfs_dir_shortform_create(xfs_da_args_t *args, xfs_ino_t parent)
147 hdr->count = 0; 147 hdr->count = 0;
148 dp->i_d.di_size = sizeof(*hdr); 148 dp->i_d.di_size = sizeof(*hdr);
149 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); 149 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
150 return(0); 150 return 0;
151} 151}
152 152
153/* 153/*
@@ -180,7 +180,7 @@ xfs_dir_shortform_addname(xfs_da_args_t *args)
180 if (sfe->namelen == args->namelen && 180 if (sfe->namelen == args->namelen &&
181 args->name[0] == sfe->name[0] && 181 args->name[0] == sfe->name[0] &&
182 memcmp(args->name, sfe->name, args->namelen) == 0) 182 memcmp(args->name, sfe->name, args->namelen) == 0)
183 return(XFS_ERROR(EEXIST)); 183 return XFS_ERROR(EEXIST);
184 sfe = XFS_DIR_SF_NEXTENTRY(sfe); 184 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
185 } 185 }
186 186
@@ -198,7 +198,7 @@ xfs_dir_shortform_addname(xfs_da_args_t *args)
198 dp->i_d.di_size += size; 198 dp->i_d.di_size += size;
199 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); 199 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
200 200
201 return(0); 201 return 0;
202} 202}
203 203
204/* 204/*
@@ -238,7 +238,7 @@ xfs_dir_shortform_removename(xfs_da_args_t *args)
238 } 238 }
239 if (i < 0) { 239 if (i < 0) {
240 ASSERT(args->oknoent); 240 ASSERT(args->oknoent);
241 return(XFS_ERROR(ENOENT)); 241 return XFS_ERROR(ENOENT);
242 } 242 }
243 243
244 if ((base + size) != dp->i_d.di_size) { 244 if ((base + size) != dp->i_d.di_size) {
@@ -251,7 +251,7 @@ xfs_dir_shortform_removename(xfs_da_args_t *args)
251 dp->i_d.di_size -= size; 251 dp->i_d.di_size -= size;
252 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); 252 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
253 253
254 return(0); 254 return 0;
255} 255}
256 256
257/* 257/*
@@ -390,7 +390,7 @@ xfs_dir_shortform_to_leaf(xfs_da_args_t *iargs)
390 390
391out: 391out:
392 kmem_free(tmpbuffer, size); 392 kmem_free(tmpbuffer, size);
393 return(retval); 393 return retval;
394} 394}
395 395
396STATIC int 396STATIC int
@@ -596,7 +596,7 @@ xfs_dir_shortform_replace(xfs_da_args_t *args)
596 /* XXX - replace assert? */ 596 /* XXX - replace assert? */
597 XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sf->hdr.parent); 597 XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sf->hdr.parent);
598 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA); 598 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
599 return(0); 599 return 0;
600 } 600 }
601 ASSERT(args->namelen != 1 || args->name[0] != '.'); 601 ASSERT(args->namelen != 1 || args->name[0] != '.');
602 sfe = &sf->list[0]; 602 sfe = &sf->list[0];
@@ -608,12 +608,12 @@ xfs_dir_shortform_replace(xfs_da_args_t *args)
608 (char *)&sfe->inumber, sizeof(xfs_ino_t))); 608 (char *)&sfe->inumber, sizeof(xfs_ino_t)));
609 XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sfe->inumber); 609 XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sfe->inumber);
610 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA); 610 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
611 return(0); 611 return 0;
612 } 612 }
613 sfe = XFS_DIR_SF_NEXTENTRY(sfe); 613 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
614 } 614 }
615 ASSERT(args->oknoent); 615 ASSERT(args->oknoent);
616 return(XFS_ERROR(ENOENT)); 616 return XFS_ERROR(ENOENT);
617} 617}
618 618
619/* 619/*
@@ -695,7 +695,7 @@ xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs)
695 695
696out: 696out:
697 kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount)); 697 kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount));
698 return(retval); 698 return retval;
699} 699}
700 700
701/* 701/*
@@ -715,17 +715,17 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args)
715 retval = xfs_da_grow_inode(args, &blkno); 715 retval = xfs_da_grow_inode(args, &blkno);
716 ASSERT(blkno == 1); 716 ASSERT(blkno == 1);
717 if (retval) 717 if (retval)
718 return(retval); 718 return retval;
719 retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp1, 719 retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp1,
720 XFS_DATA_FORK); 720 XFS_DATA_FORK);
721 if (retval) 721 if (retval)
722 return(retval); 722 return retval;
723 ASSERT(bp1 != NULL); 723 ASSERT(bp1 != NULL);
724 retval = xfs_da_get_buf(args->trans, args->dp, 1, -1, &bp2, 724 retval = xfs_da_get_buf(args->trans, args->dp, 1, -1, &bp2,
725 XFS_DATA_FORK); 725 XFS_DATA_FORK);
726 if (retval) { 726 if (retval) {
727 xfs_da_buf_done(bp1); 727 xfs_da_buf_done(bp1);
728 return(retval); 728 return retval;
729 } 729 }
730 ASSERT(bp2 != NULL); 730 ASSERT(bp2 != NULL);
731 memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount)); 731 memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount));
@@ -738,7 +738,7 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args)
738 retval = xfs_da_node_create(args, 0, 1, &bp1, XFS_DATA_FORK); 738 retval = xfs_da_node_create(args, 0, 1, &bp1, XFS_DATA_FORK);
739 if (retval) { 739 if (retval) {
740 xfs_da_buf_done(bp2); 740 xfs_da_buf_done(bp2);
741 return(retval); 741 return retval;
742 } 742 }
743 node = bp1->data; 743 node = bp1->data;
744 leaf = bp2->data; 744 leaf = bp2->data;
@@ -751,7 +751,7 @@ xfs_dir_leaf_to_node(xfs_da_args_t *args)
751 XFS_DA_LOGRANGE(node, &node->btree[0], sizeof(node->btree[0]))); 751 XFS_DA_LOGRANGE(node, &node->btree[0], sizeof(node->btree[0])));
752 xfs_da_buf_done(bp1); 752 xfs_da_buf_done(bp1);
753 753
754 return(retval); 754 return retval;
755} 755}
756 756
757 757
@@ -776,7 +776,7 @@ xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
776 ASSERT(dp != NULL); 776 ASSERT(dp != NULL);
777 retval = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp, XFS_DATA_FORK); 777 retval = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp, XFS_DATA_FORK);
778 if (retval) 778 if (retval)
779 return(retval); 779 return retval;
780 ASSERT(bp != NULL); 780 ASSERT(bp != NULL);
781 leaf = bp->data; 781 leaf = bp->data;
782 memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount)); 782 memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
@@ -791,7 +791,7 @@ xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
791 xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1); 791 xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
792 792
793 *bpp = bp; 793 *bpp = bp;
794 return(0); 794 return 0;
795} 795}
796 796
797/* 797/*
@@ -813,10 +813,10 @@ xfs_dir_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
813 ASSERT(oldblk->magic == XFS_DIR_LEAF_MAGIC); 813 ASSERT(oldblk->magic == XFS_DIR_LEAF_MAGIC);
814 error = xfs_da_grow_inode(args, &blkno); 814 error = xfs_da_grow_inode(args, &blkno);
815 if (error) 815 if (error)
816 return(error); 816 return error;
817 error = xfs_dir_leaf_create(args, blkno, &newblk->bp); 817 error = xfs_dir_leaf_create(args, blkno, &newblk->bp);
818 if (error) 818 if (error)
819 return(error); 819 return error;
820 newblk->blkno = blkno; 820 newblk->blkno = blkno;
821 newblk->magic = XFS_DIR_LEAF_MAGIC; 821 newblk->magic = XFS_DIR_LEAF_MAGIC;
822 822
@@ -826,7 +826,7 @@ xfs_dir_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
826 xfs_dir_leaf_rebalance(state, oldblk, newblk); 826 xfs_dir_leaf_rebalance(state, oldblk, newblk);
827 error = xfs_da_blk_link(state, oldblk, newblk); 827 error = xfs_da_blk_link(state, oldblk, newblk);
828 if (error) 828 if (error)
829 return(error); 829 return error;
830 830
831 /* 831 /*
832 * Insert the new entry in the correct block. 832 * Insert the new entry in the correct block.
@@ -842,7 +842,7 @@ xfs_dir_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
842 */ 842 */
843 oldblk->hashval = xfs_dir_leaf_lasthash(oldblk->bp, NULL); 843 oldblk->hashval = xfs_dir_leaf_lasthash(oldblk->bp, NULL);
844 newblk->hashval = xfs_dir_leaf_lasthash(newblk->bp, NULL); 844 newblk->hashval = xfs_dir_leaf_lasthash(newblk->bp, NULL);
845 return(error); 845 return error;
846} 846}
847 847
848/* 848/*
@@ -885,7 +885,7 @@ xfs_dir_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index)
885 if (INT_GET(map->size, ARCH_CONVERT) >= tmp) { 885 if (INT_GET(map->size, ARCH_CONVERT) >= tmp) {
886 if (!args->justcheck) 886 if (!args->justcheck)
887 xfs_dir_leaf_add_work(bp, args, index, i); 887 xfs_dir_leaf_add_work(bp, args, index, i);
888 return(0); 888 return 0;
889 } 889 }
890 sum += INT_GET(map->size, ARCH_CONVERT); 890 sum += INT_GET(map->size, ARCH_CONVERT);
891 } 891 }
@@ -896,7 +896,7 @@ xfs_dir_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index)
896 * no good and we should just give up. 896 * no good and we should just give up.
897 */ 897 */
898 if (!hdr->holes && (sum < entsize)) 898 if (!hdr->holes && (sum < entsize))
899 return(XFS_ERROR(ENOSPC)); 899 return XFS_ERROR(ENOSPC);
900 900
901 /* 901 /*
902 * Compact the entries to coalesce free space. 902 * Compact the entries to coalesce free space.
@@ -909,18 +909,18 @@ xfs_dir_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index)
909 (uint)sizeof(xfs_dir_leaf_entry_t) : 0, 909 (uint)sizeof(xfs_dir_leaf_entry_t) : 0,
910 args->justcheck); 910 args->justcheck);
911 if (error) 911 if (error)
912 return(error); 912 return error;
913 /* 913 /*
914 * After compaction, the block is guaranteed to have only one 914 * After compaction, the block is guaranteed to have only one
915 * free region, in freemap[0]. If it is not big enough, give up. 915 * free region, in freemap[0]. If it is not big enough, give up.
916 */ 916 */
917 if (INT_GET(hdr->freemap[0].size, ARCH_CONVERT) < 917 if (INT_GET(hdr->freemap[0].size, ARCH_CONVERT) <
918 (entsize + (uint)sizeof(xfs_dir_leaf_entry_t))) 918 (entsize + (uint)sizeof(xfs_dir_leaf_entry_t)))
919 return(XFS_ERROR(ENOSPC)); 919 return XFS_ERROR(ENOSPC);
920 920
921 if (!args->justcheck) 921 if (!args->justcheck)
922 xfs_dir_leaf_add_work(bp, args, index, 0); 922 xfs_dir_leaf_add_work(bp, args, index, 0);
923 return(0); 923 return 0;
924} 924}
925 925
926/* 926/*
@@ -1072,7 +1072,7 @@ xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp, int musthave,
1072 kmem_free(tmpbuffer, lbsize); 1072 kmem_free(tmpbuffer, lbsize);
1073 if (musthave || justcheck) 1073 if (musthave || justcheck)
1074 kmem_free(tmpbuffer2, lbsize); 1074 kmem_free(tmpbuffer2, lbsize);
1075 return(rval); 1075 return rval;
1076} 1076}
1077 1077
1078/* 1078/*
@@ -1292,7 +1292,7 @@ xfs_dir_leaf_figure_balance(xfs_da_state_t *state,
1292 1292
1293 *countarg = count; 1293 *countarg = count;
1294 *namebytesarg = totallen; 1294 *namebytesarg = totallen;
1295 return(foundit); 1295 return foundit;
1296} 1296}
1297 1297
1298/*======================================================================== 1298/*========================================================================
@@ -1334,7 +1334,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
1334 INT_GET(leaf->hdr.namebytes, ARCH_CONVERT); 1334 INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
1335 if (bytes > (state->blocksize >> 1)) { 1335 if (bytes > (state->blocksize >> 1)) {
1336 *action = 0; /* blk over 50%, don't try to join */ 1336 *action = 0; /* blk over 50%, don't try to join */
1337 return(0); 1337 return 0;
1338 } 1338 }
1339 1339
1340 /* 1340 /*
@@ -1353,13 +1353,13 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
1353 error = xfs_da_path_shift(state, &state->altpath, forward, 1353 error = xfs_da_path_shift(state, &state->altpath, forward,
1354 0, &retval); 1354 0, &retval);
1355 if (error) 1355 if (error)
1356 return(error); 1356 return error;
1357 if (retval) { 1357 if (retval) {
1358 *action = 0; 1358 *action = 0;
1359 } else { 1359 } else {
1360 *action = 2; 1360 *action = 2;
1361 } 1361 }
1362 return(0); 1362 return 0;
1363 } 1363 }
1364 1364
1365 /* 1365 /*
@@ -1381,7 +1381,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
1381 blkno, -1, &bp, 1381 blkno, -1, &bp,
1382 XFS_DATA_FORK); 1382 XFS_DATA_FORK);
1383 if (error) 1383 if (error)
1384 return(error); 1384 return error;
1385 ASSERT(bp != NULL); 1385 ASSERT(bp != NULL);
1386 1386
1387 leaf = (xfs_dir_leafblock_t *)info; 1387 leaf = (xfs_dir_leafblock_t *)info;
@@ -1402,7 +1402,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
1402 } 1402 }
1403 if (i >= 2) { 1403 if (i >= 2) {
1404 *action = 0; 1404 *action = 0;
1405 return(0); 1405 return 0;
1406 } 1406 }
1407 xfs_da_buf_done(bp); 1407 xfs_da_buf_done(bp);
1408 1408
@@ -1419,13 +1419,13 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
1419 0, &retval); 1419 0, &retval);
1420 } 1420 }
1421 if (error) 1421 if (error)
1422 return(error); 1422 return error;
1423 if (retval) { 1423 if (retval) {
1424 *action = 0; 1424 *action = 0;
1425 } else { 1425 } else {
1426 *action = 1; 1426 *action = 1;
1427 } 1427 }
1428 return(0); 1428 return 0;
1429} 1429}
1430 1430
1431/* 1431/*
@@ -1575,8 +1575,8 @@ xfs_dir_leaf_remove(xfs_trans_t *trans, xfs_dabuf_t *bp, int index)
1575 tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * ((uint)sizeof(xfs_dir_leaf_name_t) - 1); 1575 tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * ((uint)sizeof(xfs_dir_leaf_name_t) - 1);
1576 tmp += INT_GET(leaf->hdr.namebytes, ARCH_CONVERT); 1576 tmp += INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
1577 if (tmp < mp->m_dir_magicpct) 1577 if (tmp < mp->m_dir_magicpct)
1578 return(1); /* leaf is < 37% full */ 1578 return 1; /* leaf is < 37% full */
1579 return(0); 1579 return 0;
1580} 1580}
1581 1581
1582/* 1582/*
@@ -1732,7 +1732,7 @@ xfs_dir_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args, int *index)
1732 if ((probe == INT_GET(leaf->hdr.count, ARCH_CONVERT)) || (INT_GET(entry->hashval, ARCH_CONVERT) != hashval)) { 1732 if ((probe == INT_GET(leaf->hdr.count, ARCH_CONVERT)) || (INT_GET(entry->hashval, ARCH_CONVERT) != hashval)) {
1733 *index = probe; 1733 *index = probe;
1734 ASSERT(args->oknoent); 1734 ASSERT(args->oknoent);
1735 return(XFS_ERROR(ENOENT)); 1735 return XFS_ERROR(ENOENT);
1736 } 1736 }
1737 1737
1738 /* 1738 /*
@@ -1745,14 +1745,14 @@ xfs_dir_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args, int *index)
1745 memcmp(args->name, namest->name, args->namelen) == 0) { 1745 memcmp(args->name, namest->name, args->namelen) == 0) {
1746 XFS_DIR_SF_GET_DIRINO(&namest->inumber, &args->inumber); 1746 XFS_DIR_SF_GET_DIRINO(&namest->inumber, &args->inumber);
1747 *index = probe; 1747 *index = probe;
1748 return(XFS_ERROR(EEXIST)); 1748 return XFS_ERROR(EEXIST);
1749 } 1749 }
1750 entry++; 1750 entry++;
1751 probe++; 1751 probe++;
1752 } 1752 }
1753 *index = probe; 1753 *index = probe;
1754 ASSERT(probe == INT_GET(leaf->hdr.count, ARCH_CONVERT) || args->oknoent); 1754 ASSERT(probe == INT_GET(leaf->hdr.count, ARCH_CONVERT) || args->oknoent);
1755 return(XFS_ERROR(ENOENT)); 1755 return XFS_ERROR(ENOENT);
1756} 1756}
1757 1757
1758/*======================================================================== 1758/*========================================================================
@@ -1890,9 +1890,9 @@ xfs_dir_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
1890 INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT)) || 1890 INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT)) ||
1891 (INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) < 1891 (INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
1892 INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) { 1892 INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
1893 return(1); 1893 return 1;
1894 } 1894 }
1895 return(0); 1895 return 0;
1896} 1896}
1897 1897
1898/* 1898/*
@@ -1942,7 +1942,7 @@ xfs_dir_leaf_getdents_int(
1942 leaf = bp->data; 1942 leaf = bp->data;
1943 if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) { 1943 if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) {
1944 *eobp = 1; 1944 *eobp = 1;
1945 return(XFS_ERROR(ENOENT)); /* XXX wrong code */ 1945 return XFS_ERROR(ENOENT); /* XXX wrong code */
1946 } 1946 }
1947 1947
1948 want_entno = XFS_DA_COOKIE_ENTRY(mp, uio->uio_offset); 1948 want_entno = XFS_DA_COOKIE_ENTRY(mp, uio->uio_offset);
@@ -2000,7 +2000,7 @@ xfs_dir_leaf_getdents_int(
2000 * the node code will be setting uio_offset anyway. 2000 * the node code will be setting uio_offset anyway.
2001 */ 2001 */
2002 *eobp = 0; 2002 *eobp = 0;
2003 return(0); 2003 return 0;
2004 } 2004 }
2005 xfs_dir_trace_g_due("leaf: hash found", dp, uio, entry); 2005 xfs_dir_trace_g_due("leaf: hash found", dp, uio, entry);
2006 2006
@@ -2057,7 +2057,7 @@ xfs_dir_leaf_getdents_int(
2057 retval = xfs_da_read_buf(dp->i_transp, dp, thishash, 2057 retval = xfs_da_read_buf(dp->i_transp, dp, thishash,
2058 nextda, &bp2, XFS_DATA_FORK); 2058 nextda, &bp2, XFS_DATA_FORK);
2059 if (retval) 2059 if (retval)
2060 return(retval); 2060 return retval;
2061 2061
2062 ASSERT(bp2 != NULL); 2062 ASSERT(bp2 != NULL);
2063 2063
@@ -2073,7 +2073,7 @@ xfs_dir_leaf_getdents_int(
2073 leaf2); 2073 leaf2);
2074 xfs_da_brelse(dp->i_transp, bp2); 2074 xfs_da_brelse(dp->i_transp, bp2);
2075 2075
2076 return(XFS_ERROR(EFSCORRUPTED)); 2076 return XFS_ERROR(EFSCORRUPTED);
2077 } 2077 }
2078 2078
2079 nexthash = INT_GET(leaf2->entries[0].hashval, 2079 nexthash = INT_GET(leaf2->entries[0].hashval,
@@ -2139,7 +2139,7 @@ xfs_dir_leaf_getdents_int(
2139 2139
2140 xfs_dir_trace_g_du("leaf: E-O-B", dp, uio); 2140 xfs_dir_trace_g_du("leaf: E-O-B", dp, uio);
2141 2141
2142 return(retval); 2142 return retval;
2143 } 2143 }
2144 } 2144 }
2145 2145
@@ -2149,7 +2149,7 @@ xfs_dir_leaf_getdents_int(
2149 2149
2150 xfs_dir_trace_g_du("leaf: E-O-F", dp, uio); 2150 xfs_dir_trace_g_du("leaf: E-O-F", dp, uio);
2151 2151
2152 return(0); 2152 return 0;
2153} 2153}
2154 2154
2155/* 2155/*
diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h
index ab6b09eef9ab..eb8cd9a4667f 100644
--- a/fs/xfs/xfs_dir_leaf.h
+++ b/fs/xfs/xfs_dir_leaf.h
@@ -67,34 +67,38 @@ struct xfs_trans;
67 */ 67 */
68#define XFS_DIR_LEAF_MAPSIZE 3 /* how many freespace slots */ 68#define XFS_DIR_LEAF_MAPSIZE 3 /* how many freespace slots */
69 69
70typedef struct xfs_dir_leaf_map { /* RLE map of free bytes */
71 __uint16_t base; /* base of free region */
72 __uint16_t size; /* run length of free region */
73} xfs_dir_leaf_map_t;
74
75typedef struct xfs_dir_leaf_hdr { /* constant-structure header block */
76 xfs_da_blkinfo_t info; /* block type, links, etc. */
77 __uint16_t count; /* count of active leaf_entry's */
78 __uint16_t namebytes; /* num bytes of name strings stored */
79 __uint16_t firstused; /* first used byte in name area */
80 __uint8_t holes; /* != 0 if blk needs compaction */
81 __uint8_t pad1;
82 xfs_dir_leaf_map_t freemap[XFS_DIR_LEAF_MAPSIZE];
83} xfs_dir_leaf_hdr_t;
84
85typedef struct xfs_dir_leaf_entry { /* sorted on key, not name */
86 xfs_dahash_t hashval; /* hash value of name */
87 __uint16_t nameidx; /* index into buffer of name */
88 __uint8_t namelen; /* length of name string */
89 __uint8_t pad2;
90} xfs_dir_leaf_entry_t;
91
92typedef struct xfs_dir_leaf_name {
93 xfs_dir_ino_t inumber; /* inode number for this key */
94 __uint8_t name[1]; /* name string itself */
95} xfs_dir_leaf_name_t;
96
70typedef struct xfs_dir_leafblock { 97typedef struct xfs_dir_leafblock {
71 struct xfs_dir_leaf_hdr { /* constant-structure header block */ 98 xfs_dir_leaf_hdr_t hdr; /* constant-structure header block */
72 xfs_da_blkinfo_t info; /* block type, links, etc. */ 99 xfs_dir_leaf_entry_t entries[1]; /* var sized array */
73 __uint16_t count; /* count of active leaf_entry's */ 100 xfs_dir_leaf_name_t namelist[1]; /* grows from bottom of buf */
74 __uint16_t namebytes; /* num bytes of name strings stored */
75 __uint16_t firstused; /* first used byte in name area */
76 __uint8_t holes; /* != 0 if blk needs compaction */
77 __uint8_t pad1;
78 struct xfs_dir_leaf_map {/* RLE map of free bytes */
79 __uint16_t base; /* base of free region */
80 __uint16_t size; /* run length of free region */
81 } freemap[XFS_DIR_LEAF_MAPSIZE]; /* N largest free regions */
82 } hdr;
83 struct xfs_dir_leaf_entry { /* sorted on key, not name */
84 xfs_dahash_t hashval; /* hash value of name */
85 __uint16_t nameidx; /* index into buffer of name */
86 __uint8_t namelen; /* length of name string */
87 __uint8_t pad2;
88 } entries[1]; /* var sized array */
89 struct xfs_dir_leaf_name {
90 xfs_dir_ino_t inumber; /* inode number for this key */
91 __uint8_t name[1]; /* name string itself */
92 } namelist[1]; /* grows from bottom of buf */
93} xfs_dir_leafblock_t; 101} xfs_dir_leafblock_t;
94typedef struct xfs_dir_leaf_hdr xfs_dir_leaf_hdr_t;
95typedef struct xfs_dir_leaf_map xfs_dir_leaf_map_t;
96typedef struct xfs_dir_leaf_entry xfs_dir_leaf_entry_t;
97typedef struct xfs_dir_leaf_name xfs_dir_leaf_name_t;
98 102
99/* 103/*
100 * Length of name for which a 512-byte block filesystem 104 * Length of name for which a 512-byte block filesystem
@@ -126,11 +130,10 @@ typedef union {
126#define XFS_PUT_COOKIE(c,mp,bno,entry,hash) \ 130#define XFS_PUT_COOKIE(c,mp,bno,entry,hash) \
127 ((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash)) 131 ((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash))
128 132
129typedef struct xfs_dir_put_args 133typedef struct xfs_dir_put_args {
130{
131 xfs_dircook_t cook; /* cookie of (next) entry */ 134 xfs_dircook_t cook; /* cookie of (next) entry */
132 xfs_intino_t ino; /* inode number */ 135 xfs_intino_t ino; /* inode number */
133 struct xfs_dirent *dbp; /* buffer pointer */ 136 struct xfs_dirent *dbp; /* buffer pointer */
134 char *name; /* directory entry name */ 137 char *name; /* directory entry name */
135 int namelen; /* length of name */ 138 int namelen; /* length of name */
136 int done; /* output: set if value was stored */ 139 int done; /* output: set if value was stored */
@@ -138,7 +141,8 @@ typedef struct xfs_dir_put_args
138 struct uio *uio; /* uio control structure */ 141 struct uio *uio; /* uio control structure */
139} xfs_dir_put_args_t; 142} xfs_dir_put_args_t;
140 143
141#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len) xfs_dir_leaf_entsize_byname(len) 144#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len) \
145 xfs_dir_leaf_entsize_byname(len)
142static inline int xfs_dir_leaf_entsize_byname(int len) 146static inline int xfs_dir_leaf_entsize_byname(int len)
143{ 147{
144 return (uint)sizeof(xfs_dir_leaf_name_t)-1 + len; 148 return (uint)sizeof(xfs_dir_leaf_name_t)-1 + len;
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index 864bf6955689..b4c7f2bc55a0 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -152,7 +152,7 @@ typedef enum {
152 152
153#define DM_FLAGS_NDELAY 0x001 /* return EAGAIN after dm_pending() */ 153#define DM_FLAGS_NDELAY 0x001 /* return EAGAIN after dm_pending() */
154#define DM_FLAGS_UNWANTED 0x002 /* event not in fsys dm_eventset_t */ 154#define DM_FLAGS_UNWANTED 0x002 /* event not in fsys dm_eventset_t */
155#define DM_FLAGS_ISEM 0x004 /* thread holds i_sem */ 155#define DM_FLAGS_IMUX 0x004 /* thread holds i_mutex */
156#define DM_FLAGS_IALLOCSEM_RD 0x010 /* thread holds i_alloc_sem rd */ 156#define DM_FLAGS_IALLOCSEM_RD 0x010 /* thread holds i_alloc_sem rd */
157#define DM_FLAGS_IALLOCSEM_WR 0x020 /* thread holds i_alloc_sem wr */ 157#define DM_FLAGS_IALLOCSEM_WR 0x020 /* thread holds i_alloc_sem wr */
158 158
@@ -161,21 +161,21 @@ typedef enum {
161 */ 161 */
162#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0) 162#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0)
163#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ 163#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
164 DM_FLAGS_ISEM : 0) 164 DM_FLAGS_IMUX : 0)
165#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM) 165#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
166#endif 166#endif
167 167
168#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \ 168#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
169 (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22)) 169 (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22))
170#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ 170#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
171 DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_ISEM) 171 DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_IMUX)
172#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM) 172#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
173#endif 173#endif
174 174
175#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21) 175#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21)
176#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ 176#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
177 0 : DM_FLAGS_ISEM) 177 0 : DM_FLAGS_IMUX)
178#define DM_SEM_FLAG_WR (DM_FLAGS_ISEM) 178#define DM_SEM_FLAG_WR (DM_FLAGS_IMUX)
179#endif 179#endif
180 180
181 181
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index d7b6b5d16704..2a21c5024017 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -54,7 +54,6 @@ xfs_error_trap(int e)
54 if (e != xfs_etrap[i]) 54 if (e != xfs_etrap[i])
55 continue; 55 continue;
56 cmn_err(CE_NOTE, "xfs_error_trap: error %d", e); 56 cmn_err(CE_NOTE, "xfs_error_trap: error %d", e);
57 debug_stop_all_cpus((void *)-1LL);
58 BUG(); 57 BUG();
59 break; 58 break;
60 } 59 }
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 06d8a8426c16..26b8e709a569 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -18,9 +18,6 @@
18#ifndef __XFS_ERROR_H__ 18#ifndef __XFS_ERROR_H__
19#define __XFS_ERROR_H__ 19#define __XFS_ERROR_H__
20 20
21#define prdev(fmt,targ,args...) \
22 printk("XFS: device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
23
24#define XFS_ERECOVER 1 /* Failure to recover log */ 21#define XFS_ERECOVER 1 /* Failure to recover log */
25#define XFS_ELOGSTAT 2 /* Failure to stat log in user space */ 22#define XFS_ELOGSTAT 2 /* Failure to stat log in user space */
26#define XFS_ENOLOGSPACE 3 /* Reservation too large */ 23#define XFS_ENOLOGSPACE 3 /* Reservation too large */
@@ -182,8 +179,11 @@ extern int xfs_errortag_clearall_umount(int64_t fsid, char *fsname, int loud);
182struct xfs_mount; 179struct xfs_mount;
183/* PRINTFLIKE4 */ 180/* PRINTFLIKE4 */
184extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp, 181extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp,
185 char *fmt, ...); 182 char *fmt, ...);
186/* PRINTFLIKE3 */ 183/* PRINTFLIKE3 */
187extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...); 184extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...);
188 185
186#define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \
187 xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args)
188
189#endif /* __XFS_ERROR_H__ */ 189#endif /* __XFS_ERROR_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index ba096f80f48d..14010f1fa82f 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -3,15 +3,15 @@
3 * All Rights Reserved. 3 * All Rights Reserved.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as 6 * modify it under the terms of the GNU Lesser General Public License
7 * published by the Free Software Foundation. 7 * as published by the Free Software Foundation.
8 * 8 *
9 * This program is distributed in the hope that it would be useful, 9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU Lesser General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
@@ -65,6 +65,8 @@ struct fsxattr {
65#define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ 65#define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
66#define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ 66#define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
67#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ 67#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
68#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
69#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
68#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ 70#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
69 71
70/* 72/*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index d1236d6f4045..b4d971b01588 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -501,7 +501,7 @@ xfs_reserve_blocks(
501 if (inval == (__uint64_t *)NULL) { 501 if (inval == (__uint64_t *)NULL) {
502 outval->resblks = mp->m_resblks; 502 outval->resblks = mp->m_resblks;
503 outval->resblks_avail = mp->m_resblks_avail; 503 outval->resblks_avail = mp->m_resblks_avail;
504 return(0); 504 return 0;
505 } 505 }
506 506
507 request = *inval; 507 request = *inval;
@@ -537,7 +537,33 @@ xfs_reserve_blocks(
537 outval->resblks = mp->m_resblks; 537 outval->resblks = mp->m_resblks;
538 outval->resblks_avail = mp->m_resblks_avail; 538 outval->resblks_avail = mp->m_resblks_avail;
539 XFS_SB_UNLOCK(mp, s); 539 XFS_SB_UNLOCK(mp, s);
540 return(0); 540 return 0;
541}
542
543void
544xfs_fs_log_dummy(xfs_mount_t *mp)
545{
546 xfs_trans_t *tp;
547 xfs_inode_t *ip;
548
549
550 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
551 atomic_inc(&mp->m_active_trans);
552 if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) {
553 xfs_trans_cancel(tp, 0);
554 return;
555 }
556
557 ip = mp->m_rootip;
558 xfs_ilock(ip, XFS_ILOCK_EXCL);
559
560 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
561 xfs_trans_ihold(tp, ip);
562 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
563 xfs_trans_set_sync(tp);
564 xfs_trans_commit(tp, 0, NULL);
565
566 xfs_iunlock(ip, XFS_ILOCK_EXCL);
541} 567}
542 568
543int 569int
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index f32713f14f9a..300d0c9d61ad 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,5 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
25extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, 25extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
26 xfs_fsop_resblks_t *outval); 26 xfs_fsop_resblks_t *outval);
27extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); 27extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
28extern void xfs_fs_log_dummy(xfs_mount_t *mp);
28 29
29#endif /* __XFS_FSOPS_H__ */ 30#endif /* __XFS_FSOPS_H__ */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index fc19eedbd11b..8e380a1fb79b 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -493,7 +493,6 @@ xfs_iget(
493 493
494retry: 494retry:
495 if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) { 495 if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) {
496 bhv_desc_t *bdp;
497 xfs_inode_t *ip; 496 xfs_inode_t *ip;
498 497
499 vp = LINVFS_GET_VP(inode); 498 vp = LINVFS_GET_VP(inode);
@@ -517,14 +516,12 @@ retry:
517 * to wait for the inode to go away. 516 * to wait for the inode to go away.
518 */ 517 */
519 if (is_bad_inode(inode) || 518 if (is_bad_inode(inode) ||
520 ((bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), 519 ((ip = xfs_vtoi(vp)) == NULL)) {
521 &xfs_vnodeops)) == NULL)) {
522 iput(inode); 520 iput(inode);
523 delay(1); 521 delay(1);
524 goto retry; 522 goto retry;
525 } 523 }
526 524
527 ip = XFS_BHVTOI(bdp);
528 if (lock_flags != 0) 525 if (lock_flags != 0)
529 xfs_ilock(ip, lock_flags); 526 xfs_ilock(ip, lock_flags);
530 XFS_STATS_INC(xs_ig_found); 527 XFS_STATS_INC(xs_ig_found);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index df0d4572d70a..1d7f5a7e063e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -404,9 +404,8 @@ xfs_iformat(
404 INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) + 404 INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) +
405 INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) > 405 INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) >
406 INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT))) { 406 INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT))) {
407 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 407 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
408 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu." 408 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
409 " Unmount and run xfs_repair.",
410 (unsigned long long)ip->i_ino, 409 (unsigned long long)ip->i_ino,
411 (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) 410 (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT)
412 + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)), 411 + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)),
@@ -418,9 +417,8 @@ xfs_iformat(
418 } 417 }
419 418
420 if (unlikely(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize)) { 419 if (unlikely(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize)) {
421 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 420 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
422 "corrupt dinode %Lu, forkoff = 0x%x." 421 "corrupt dinode %Lu, forkoff = 0x%x.",
423 " Unmount and run xfs_repair.",
424 (unsigned long long)ip->i_ino, 422 (unsigned long long)ip->i_ino,
425 (int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT))); 423 (int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT)));
426 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 424 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
@@ -451,8 +449,9 @@ xfs_iformat(
451 * no local regular files yet 449 * no local regular files yet
452 */ 450 */
453 if (unlikely((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & S_IFMT) == S_IFREG)) { 451 if (unlikely((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & S_IFMT) == S_IFREG)) {
454 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 452 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
455 "corrupt inode (local format for regular file) %Lu. Unmount and run xfs_repair.", 453 "corrupt inode %Lu "
454 "(local format for regular file).",
456 (unsigned long long) ip->i_ino); 455 (unsigned long long) ip->i_ino);
457 XFS_CORRUPTION_ERROR("xfs_iformat(4)", 456 XFS_CORRUPTION_ERROR("xfs_iformat(4)",
458 XFS_ERRLEVEL_LOW, 457 XFS_ERRLEVEL_LOW,
@@ -462,8 +461,9 @@ xfs_iformat(
462 461
463 di_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT); 462 di_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT);
464 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 463 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
465 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 464 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
466 "corrupt inode %Lu (bad size %Ld for local inode). Unmount and run xfs_repair.", 465 "corrupt inode %Lu "
466 "(bad size %Ld for local inode).",
467 (unsigned long long) ip->i_ino, 467 (unsigned long long) ip->i_ino,
468 (long long) di_size); 468 (long long) di_size);
469 XFS_CORRUPTION_ERROR("xfs_iformat(5)", 469 XFS_CORRUPTION_ERROR("xfs_iformat(5)",
@@ -551,8 +551,9 @@ xfs_iformat_local(
551 * kmem_alloc() or memcpy() below. 551 * kmem_alloc() or memcpy() below.
552 */ 552 */
553 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 553 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
554 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 554 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
555 "corrupt inode %Lu (bad size %d for local fork, size = %d). Unmount and run xfs_repair.", 555 "corrupt inode %Lu "
556 "(bad size %d for local fork, size = %d).",
556 (unsigned long long) ip->i_ino, size, 557 (unsigned long long) ip->i_ino, size,
557 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 558 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
558 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, 559 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
@@ -610,8 +611,8 @@ xfs_iformat_extents(
610 * kmem_alloc() or memcpy() below. 611 * kmem_alloc() or memcpy() below.
611 */ 612 */
612 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 613 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
613 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 614 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
614 "corrupt inode %Lu ((a)extents = %d). Unmount and run xfs_repair.", 615 "corrupt inode %Lu ((a)extents = %d).",
615 (unsigned long long) ip->i_ino, nex); 616 (unsigned long long) ip->i_ino, nex);
616 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 617 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
617 ip->i_mount, dip); 618 ip->i_mount, dip);
@@ -692,8 +693,8 @@ xfs_iformat_btree(
692 || XFS_BMDR_SPACE_CALC(nrecs) > 693 || XFS_BMDR_SPACE_CALC(nrecs) >
693 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) 694 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
694 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 695 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
695 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 696 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
696 "corrupt inode %Lu (btree). Unmount and run xfs_repair.", 697 "corrupt inode %Lu (btree).",
697 (unsigned long long) ip->i_ino); 698 (unsigned long long) ip->i_ino);
698 XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 699 XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
699 ip->i_mount); 700 ip->i_mount);
@@ -809,6 +810,10 @@ _xfs_dic2xflags(
809 flags |= XFS_XFLAG_PROJINHERIT; 810 flags |= XFS_XFLAG_PROJINHERIT;
810 if (di_flags & XFS_DIFLAG_NOSYMLINKS) 811 if (di_flags & XFS_DIFLAG_NOSYMLINKS)
811 flags |= XFS_XFLAG_NOSYMLINKS; 812 flags |= XFS_XFLAG_NOSYMLINKS;
813 if (di_flags & XFS_DIFLAG_EXTSIZE)
814 flags |= XFS_XFLAG_EXTSIZE;
815 if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
816 flags |= XFS_XFLAG_EXTSZINHERIT;
812 } 817 }
813 818
814 return flags; 819 return flags;
@@ -1192,11 +1197,19 @@ xfs_ialloc(
1192 if ((mode & S_IFMT) == S_IFDIR) { 1197 if ((mode & S_IFMT) == S_IFDIR) {
1193 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1198 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
1194 di_flags |= XFS_DIFLAG_RTINHERIT; 1199 di_flags |= XFS_DIFLAG_RTINHERIT;
1195 } else { 1200 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1201 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
1202 ip->i_d.di_extsize = pip->i_d.di_extsize;
1203 }
1204 } else if ((mode & S_IFMT) == S_IFREG) {
1196 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) { 1205 if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
1197 di_flags |= XFS_DIFLAG_REALTIME; 1206 di_flags |= XFS_DIFLAG_REALTIME;
1198 ip->i_iocore.io_flags |= XFS_IOCORE_RT; 1207 ip->i_iocore.io_flags |= XFS_IOCORE_RT;
1199 } 1208 }
1209 if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1210 di_flags |= XFS_DIFLAG_EXTSIZE;
1211 ip->i_d.di_extsize = pip->i_d.di_extsize;
1212 }
1200 } 1213 }
1201 if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && 1214 if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
1202 xfs_inherit_noatime) 1215 xfs_inherit_noatime)
@@ -1262,7 +1275,7 @@ xfs_isize_check(
1262 if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) 1275 if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
1263 return; 1276 return;
1264 1277
1265 if ( ip->i_d.di_flags & XFS_DIFLAG_REALTIME ) 1278 if (ip->i_d.di_flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_EXTSIZE))
1266 return; 1279 return;
1267 1280
1268 nimaps = 2; 1281 nimaps = 2;
@@ -1765,22 +1778,19 @@ xfs_igrow_start(
1765 xfs_fsize_t new_size, 1778 xfs_fsize_t new_size,
1766 cred_t *credp) 1779 cred_t *credp)
1767{ 1780{
1768 xfs_fsize_t isize;
1769 int error; 1781 int error;
1770 1782
1771 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1783 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
1772 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); 1784 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
1773 ASSERT(new_size > ip->i_d.di_size); 1785 ASSERT(new_size > ip->i_d.di_size);
1774 1786
1775 error = 0;
1776 isize = ip->i_d.di_size;
1777 /* 1787 /*
1778 * Zero any pages that may have been created by 1788 * Zero any pages that may have been created by
1779 * xfs_write_file() beyond the end of the file 1789 * xfs_write_file() beyond the end of the file
1780 * and any blocks between the old and new file sizes. 1790 * and any blocks between the old and new file sizes.
1781 */ 1791 */
1782 error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, isize, 1792 error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size,
1783 new_size); 1793 ip->i_d.di_size, new_size);
1784 return error; 1794 return error;
1785} 1795}
1786 1796
@@ -3355,6 +3365,11 @@ xfs_iflush_int(
3355 ip->i_update_core = 0; 3365 ip->i_update_core = 0;
3356 SYNCHRONIZE(); 3366 SYNCHRONIZE();
3357 3367
3368 /*
3369 * Make sure to get the latest atime from the Linux inode.
3370 */
3371 xfs_synchronize_atime(ip);
3372
3358 if (XFS_TEST_ERROR(INT_GET(dip->di_core.di_magic,ARCH_CONVERT) != XFS_DINODE_MAGIC, 3373 if (XFS_TEST_ERROR(INT_GET(dip->di_core.di_magic,ARCH_CONVERT) != XFS_DINODE_MAGIC,
3359 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 3374 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
3360 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3375 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 124d30e6143b..1cfbcf18ce86 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -436,6 +436,10 @@ void xfs_ichgtime(xfs_inode_t *, int);
436xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); 436xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
437void xfs_lock_inodes(xfs_inode_t **, int, int, uint); 437void xfs_lock_inodes(xfs_inode_t **, int, int, uint);
438 438
439xfs_inode_t *xfs_vtoi(struct vnode *vp);
440
441void xfs_synchronize_atime(xfs_inode_t *);
442
439#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) 443#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
440 444
441#ifdef DEBUG 445#ifdef DEBUG
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 7f3363c621e1..36aa1fcb90a5 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -271,6 +271,11 @@ xfs_inode_item_format(
271 if (ip->i_update_size) 271 if (ip->i_update_size)
272 ip->i_update_size = 0; 272 ip->i_update_size = 0;
273 273
274 /*
275 * Make sure to get the latest atime from the Linux inode.
276 */
277 xfs_synchronize_atime(ip);
278
274 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 279 vecp->i_addr = (xfs_caddr_t)&ip->i_d;
275 vecp->i_len = sizeof(xfs_dinode_core_t); 280 vecp->i_len = sizeof(xfs_dinode_core_t);
276 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); 281 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
@@ -603,7 +608,7 @@ xfs_inode_item_trylock(
603 if (iip->ili_pushbuf_flag == 0) { 608 if (iip->ili_pushbuf_flag == 0) {
604 iip->ili_pushbuf_flag = 1; 609 iip->ili_pushbuf_flag = 1;
605#ifdef DEBUG 610#ifdef DEBUG
606 iip->ili_push_owner = get_thread_id(); 611 iip->ili_push_owner = current_pid();
607#endif 612#endif
608 /* 613 /*
609 * Inode is left locked in shared mode. 614 * Inode is left locked in shared mode.
@@ -782,7 +787,7 @@ xfs_inode_item_pushbuf(
782 * trying to duplicate our effort. 787 * trying to duplicate our effort.
783 */ 788 */
784 ASSERT(iip->ili_pushbuf_flag != 0); 789 ASSERT(iip->ili_pushbuf_flag != 0);
785 ASSERT(iip->ili_push_owner == get_thread_id()); 790 ASSERT(iip->ili_push_owner == current_pid());
786 791
787 /* 792 /*
788 * If flushlock isn't locked anymore, chances are that the 793 * If flushlock isn't locked anymore, chances are that the
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 45a77a3a6c07..788917f355c4 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -40,7 +40,6 @@
40#include "xfs_ialloc.h" 40#include "xfs_ialloc.h"
41#include "xfs_btree.h" 41#include "xfs_btree.h"
42#include "xfs_bmap.h" 42#include "xfs_bmap.h"
43#include "xfs_bit.h"
44#include "xfs_rtalloc.h" 43#include "xfs_rtalloc.h"
45#include "xfs_error.h" 44#include "xfs_error.h"
46#include "xfs_itable.h" 45#include "xfs_itable.h"
@@ -263,7 +262,7 @@ phase2:
263 case BMAPI_WRITE: 262 case BMAPI_WRITE:
264 /* If we found an extent, return it */ 263 /* If we found an extent, return it */
265 if (nimaps && 264 if (nimaps &&
266 (imap.br_startblock != HOLESTARTBLOCK) && 265 (imap.br_startblock != HOLESTARTBLOCK) &&
267 (imap.br_startblock != DELAYSTARTBLOCK)) { 266 (imap.br_startblock != DELAYSTARTBLOCK)) {
268 xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io, 267 xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,
269 offset, count, iomapp, &imap, flags); 268 offset, count, iomapp, &imap, flags);
@@ -318,6 +317,58 @@ out:
318} 317}
319 318
320STATIC int 319STATIC int
320xfs_iomap_eof_align_last_fsb(
321 xfs_mount_t *mp,
322 xfs_iocore_t *io,
323 xfs_fsize_t isize,
324 xfs_extlen_t extsize,
325 xfs_fileoff_t *last_fsb)
326{
327 xfs_fileoff_t new_last_fsb = 0;
328 xfs_extlen_t align;
329 int eof, error;
330
331 if (io->io_flags & XFS_IOCORE_RT)
332 ;
333 /*
334 * If mounted with the "-o swalloc" option, roundup the allocation
335 * request to a stripe width boundary if the file size is >=
336 * stripe width and we are allocating past the allocation eof.
337 */
338 else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
339 (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)))
340 new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
341 /*
342 * Roundup the allocation request to a stripe unit (m_dalign) boundary
343 * if the file size is >= stripe unit size, and we are allocating past
344 * the allocation eof.
345 */
346 else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)))
347 new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
348
349 /*
350 * Always round up the allocation request to an extent boundary
351 * (when file on a real-time subvolume or has di_extsize hint).
352 */
353 if (extsize) {
354 if (new_last_fsb)
355 align = roundup_64(new_last_fsb, extsize);
356 else
357 align = extsize;
358 new_last_fsb = roundup_64(*last_fsb, align);
359 }
360
361 if (new_last_fsb) {
362 error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
363 if (error)
364 return error;
365 if (eof)
366 *last_fsb = new_last_fsb;
367 }
368 return 0;
369}
370
371STATIC int
321xfs_flush_space( 372xfs_flush_space(
322 xfs_inode_t *ip, 373 xfs_inode_t *ip,
323 int *fsynced, 374 int *fsynced,
@@ -363,19 +414,20 @@ xfs_iomap_write_direct(
363 xfs_iocore_t *io = &ip->i_iocore; 414 xfs_iocore_t *io = &ip->i_iocore;
364 xfs_fileoff_t offset_fsb; 415 xfs_fileoff_t offset_fsb;
365 xfs_fileoff_t last_fsb; 416 xfs_fileoff_t last_fsb;
366 xfs_filblks_t count_fsb; 417 xfs_filblks_t count_fsb, resaligned;
367 xfs_fsblock_t firstfsb; 418 xfs_fsblock_t firstfsb;
419 xfs_extlen_t extsz, temp;
420 xfs_fsize_t isize;
368 int nimaps; 421 int nimaps;
369 int error;
370 int bmapi_flag; 422 int bmapi_flag;
371 int quota_flag; 423 int quota_flag;
372 int rt; 424 int rt;
373 xfs_trans_t *tp; 425 xfs_trans_t *tp;
374 xfs_bmbt_irec_t imap; 426 xfs_bmbt_irec_t imap;
375 xfs_bmap_free_t free_list; 427 xfs_bmap_free_t free_list;
376 xfs_filblks_t qblocks, resblks; 428 uint qblocks, resblks, resrtextents;
377 int committed; 429 int committed;
378 int resrtextents; 430 int error;
379 431
380 /* 432 /*
381 * Make sure that the dquots are there. This doesn't hold 433 * Make sure that the dquots are there. This doesn't hold
@@ -385,38 +437,53 @@ xfs_iomap_write_direct(
385 if (error) 437 if (error)
386 return XFS_ERROR(error); 438 return XFS_ERROR(error);
387 439
388 offset_fsb = XFS_B_TO_FSBT(mp, offset); 440 rt = XFS_IS_REALTIME_INODE(ip);
389 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 441 if (unlikely(rt)) {
390 count_fsb = last_fsb - offset_fsb; 442 if (!(extsz = ip->i_d.di_extsize))
391 if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) { 443 extsz = mp->m_sb.sb_rextsize;
392 xfs_fileoff_t map_last_fsb; 444 } else {
393 445 extsz = ip->i_d.di_extsize;
394 map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff;
395 if (map_last_fsb < last_fsb) {
396 last_fsb = map_last_fsb;
397 count_fsb = last_fsb - offset_fsb;
398 }
399 ASSERT(count_fsb > 0);
400 } 446 }
401 447
402 /* 448 isize = ip->i_d.di_size;
403 * Determine if reserving space on the data or realtime partition. 449 if (io->io_new_size > isize)
404 */ 450 isize = io->io_new_size;
405 if ((rt = XFS_IS_REALTIME_INODE(ip))) {
406 xfs_extlen_t extsz;
407 451
408 if (!(extsz = ip->i_d.di_extsize)) 452 offset_fsb = XFS_B_TO_FSBT(mp, offset);
409 extsz = mp->m_sb.sb_rextsize; 453 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
410 resrtextents = qblocks = (count_fsb + extsz - 1); 454 if ((offset + count) > isize) {
411 do_div(resrtextents, mp->m_sb.sb_rextsize); 455 error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
412 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 456 &last_fsb);
413 quota_flag = XFS_QMOPT_RES_RTBLKS; 457 if (error)
458 goto error_out;
414 } else { 459 } else {
415 resrtextents = 0; 460 if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
416 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb); 461 last_fsb = MIN(last_fsb, (xfs_fileoff_t)
417 quota_flag = XFS_QMOPT_RES_REGBLKS; 462 ret_imap->br_blockcount +
463 ret_imap->br_startoff);
464 }
465 count_fsb = last_fsb - offset_fsb;
466 ASSERT(count_fsb > 0);
467
468 resaligned = count_fsb;
469 if (unlikely(extsz)) {
470 if ((temp = do_mod(offset_fsb, extsz)))
471 resaligned += temp;
472 if ((temp = do_mod(resaligned, extsz)))
473 resaligned += extsz - temp;
418 } 474 }
419 475
476 if (unlikely(rt)) {
477 resrtextents = qblocks = resaligned;
478 resrtextents /= mp->m_sb.sb_rextsize;
479 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
480 quota_flag = XFS_QMOPT_RES_RTBLKS;
481 } else {
482 resrtextents = 0;
483 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
484 quota_flag = XFS_QMOPT_RES_REGBLKS;
485 }
486
420 /* 487 /*
421 * Allocate and setup the transaction 488 * Allocate and setup the transaction
422 */ 489 */
@@ -426,7 +493,6 @@ xfs_iomap_write_direct(
426 XFS_WRITE_LOG_RES(mp), resrtextents, 493 XFS_WRITE_LOG_RES(mp), resrtextents,
427 XFS_TRANS_PERM_LOG_RES, 494 XFS_TRANS_PERM_LOG_RES,
428 XFS_WRITE_LOG_COUNT); 495 XFS_WRITE_LOG_COUNT);
429
430 /* 496 /*
431 * Check for running out of space, note: need lock to return 497 * Check for running out of space, note: need lock to return
432 */ 498 */
@@ -436,20 +502,20 @@ xfs_iomap_write_direct(
436 if (error) 502 if (error)
437 goto error_out; 503 goto error_out;
438 504
439 if (XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag)) { 505 error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
440 error = (EDQUOT); 506 qblocks, 0, quota_flag);
507 if (error)
441 goto error1; 508 goto error1;
442 }
443 509
444 bmapi_flag = XFS_BMAPI_WRITE;
445 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 510 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
446 xfs_trans_ihold(tp, ip); 511 xfs_trans_ihold(tp, ip);
447 512
448 if (!(flags & BMAPI_MMAP) && (offset < ip->i_d.di_size || rt)) 513 bmapi_flag = XFS_BMAPI_WRITE;
514 if ((flags & BMAPI_DIRECT) && (offset < ip->i_d.di_size || extsz))
449 bmapi_flag |= XFS_BMAPI_PREALLOC; 515 bmapi_flag |= XFS_BMAPI_PREALLOC;
450 516
451 /* 517 /*
452 * Issue the bmapi() call to allocate the blocks 518 * Issue the xfs_bmapi() call to allocate the blocks
453 */ 519 */
454 XFS_BMAP_INIT(&free_list, &firstfsb); 520 XFS_BMAP_INIT(&free_list, &firstfsb);
455 nimaps = 1; 521 nimaps = 1;
@@ -484,8 +550,10 @@ xfs_iomap_write_direct(
484 "extent-state : %x \n", 550 "extent-state : %x \n",
485 (ip->i_mount)->m_fsname, 551 (ip->i_mount)->m_fsname,
486 (long long)ip->i_ino, 552 (long long)ip->i_ino,
487 ret_imap->br_startblock, ret_imap->br_startoff, 553 (unsigned long long)ret_imap->br_startblock,
488 ret_imap->br_blockcount,ret_imap->br_state); 554 (unsigned long long)ret_imap->br_startoff,
555 (unsigned long long)ret_imap->br_blockcount,
556 ret_imap->br_state);
489 } 557 }
490 return 0; 558 return 0;
491 559
@@ -501,6 +569,63 @@ error_out:
501 return XFS_ERROR(error); 569 return XFS_ERROR(error);
502} 570}
503 571
572/*
573 * If the caller is doing a write at the end of the file,
574 * then extend the allocation out to the file system's write
575 * iosize. We clean up any extra space left over when the
576 * file is closed in xfs_inactive().
577 *
578 * For sync writes, we are flushing delayed allocate space to
579 * try to make additional space available for allocation near
580 * the filesystem full boundary - preallocation hurts in that
581 * situation, of course.
582 */
583STATIC int
584xfs_iomap_eof_want_preallocate(
585 xfs_mount_t *mp,
586 xfs_iocore_t *io,
587 xfs_fsize_t isize,
588 xfs_off_t offset,
589 size_t count,
590 int ioflag,
591 xfs_bmbt_irec_t *imap,
592 int nimaps,
593 int *prealloc)
594{
595 xfs_fileoff_t start_fsb;
596 xfs_filblks_t count_fsb;
597 xfs_fsblock_t firstblock;
598 int n, error, imaps;
599
600 *prealloc = 0;
601 if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize)
602 return 0;
603
604 /*
605 * If there are any real blocks past eof, then don't
606 * do any speculative allocation.
607 */
608 start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
609 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
610 while (count_fsb > 0) {
611 imaps = nimaps;
612 firstblock = NULLFSBLOCK;
613 error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
614 0, &firstblock, 0, imap, &imaps, NULL);
615 if (error)
616 return error;
617 for (n = 0; n < imaps; n++) {
618 if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
619 (imap[n].br_startblock != DELAYSTARTBLOCK))
620 return 0;
621 start_fsb += imap[n].br_blockcount;
622 count_fsb -= imap[n].br_blockcount;
623 }
624 }
625 *prealloc = 1;
626 return 0;
627}
628
504int 629int
505xfs_iomap_write_delay( 630xfs_iomap_write_delay(
506 xfs_inode_t *ip, 631 xfs_inode_t *ip,
@@ -514,13 +639,15 @@ xfs_iomap_write_delay(
514 xfs_iocore_t *io = &ip->i_iocore; 639 xfs_iocore_t *io = &ip->i_iocore;
515 xfs_fileoff_t offset_fsb; 640 xfs_fileoff_t offset_fsb;
516 xfs_fileoff_t last_fsb; 641 xfs_fileoff_t last_fsb;
517 xfs_fsize_t isize; 642 xfs_off_t aligned_offset;
643 xfs_fileoff_t ioalign;
518 xfs_fsblock_t firstblock; 644 xfs_fsblock_t firstblock;
645 xfs_extlen_t extsz;
646 xfs_fsize_t isize;
519 int nimaps; 647 int nimaps;
520 int error;
521 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; 648 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
522 int aeof; 649 int prealloc, fsynced = 0;
523 int fsynced = 0; 650 int error;
524 651
525 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 652 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
526 653
@@ -528,152 +655,57 @@ xfs_iomap_write_delay(
528 * Make sure that the dquots are there. This doesn't hold 655 * Make sure that the dquots are there. This doesn't hold
529 * the ilock across a disk read. 656 * the ilock across a disk read.
530 */ 657 */
531
532 error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED); 658 error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
533 if (error) 659 if (error)
534 return XFS_ERROR(error); 660 return XFS_ERROR(error);
535 661
662 if (XFS_IS_REALTIME_INODE(ip)) {
663 if (!(extsz = ip->i_d.di_extsize))
664 extsz = mp->m_sb.sb_rextsize;
665 } else {
666 extsz = ip->i_d.di_extsize;
667 }
668
669 offset_fsb = XFS_B_TO_FSBT(mp, offset);
670
536retry: 671retry:
537 isize = ip->i_d.di_size; 672 isize = ip->i_d.di_size;
538 if (io->io_new_size > isize) { 673 if (io->io_new_size > isize)
539 isize = io->io_new_size; 674 isize = io->io_new_size;
540 }
541 675
542 aeof = 0; 676 error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count,
543 offset_fsb = XFS_B_TO_FSBT(mp, offset); 677 ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
544 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 678 if (error)
545 /* 679 return error;
546 * If the caller is doing a write at the end of the file,
547 * then extend the allocation (and the buffer used for the write)
548 * out to the file system's write iosize. We clean up any extra
549 * space left over when the file is closed in xfs_inactive().
550 *
551 * For sync writes, we are flushing delayed allocate space to
552 * try to make additional space available for allocation near
553 * the filesystem full boundary - preallocation hurts in that
554 * situation, of course.
555 */
556 if (!(ioflag & BMAPI_SYNC) && ((offset + count) > ip->i_d.di_size)) {
557 xfs_off_t aligned_offset;
558 xfs_filblks_t count_fsb;
559 unsigned int iosize;
560 xfs_fileoff_t ioalign;
561 int n;
562 xfs_fileoff_t start_fsb;
563 680
564 /* 681 if (prealloc) {
565 * If there are any real blocks past eof, then don't
566 * do any speculative allocation.
567 */
568 start_fsb = XFS_B_TO_FSBT(mp,
569 ((xfs_ufsize_t)(offset + count - 1)));
570 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
571 while (count_fsb > 0) {
572 nimaps = XFS_WRITE_IMAPS;
573 error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
574 0, &firstblock, 0, imap, &nimaps, NULL);
575 if (error) {
576 return error;
577 }
578 for (n = 0; n < nimaps; n++) {
579 if ( !(io->io_flags & XFS_IOCORE_RT) &&
580 !imap[n].br_startblock) {
581 cmn_err(CE_PANIC,"Access to block "
582 "zero: fs <%s> inode: %lld "
583 "start_block : %llx start_off "
584 ": %llx blkcnt : %llx "
585 "extent-state : %x \n",
586 (ip->i_mount)->m_fsname,
587 (long long)ip->i_ino,
588 imap[n].br_startblock,
589 imap[n].br_startoff,
590 imap[n].br_blockcount,
591 imap[n].br_state);
592 }
593 if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
594 (imap[n].br_startblock != DELAYSTARTBLOCK)) {
595 goto write_map;
596 }
597 start_fsb += imap[n].br_blockcount;
598 count_fsb -= imap[n].br_blockcount;
599 }
600 }
601 iosize = mp->m_writeio_blocks;
602 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 682 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
603 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 683 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
604 last_fsb = ioalign + iosize; 684 last_fsb = ioalign + mp->m_writeio_blocks;
605 aeof = 1; 685 } else {
686 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
606 } 687 }
607write_map:
608 nimaps = XFS_WRITE_IMAPS;
609 firstblock = NULLFSBLOCK;
610 688
611 /* 689 if (prealloc || extsz) {
612 * If mounted with the "-o swalloc" option, roundup the allocation 690 error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
613 * request to a stripe width boundary if the file size is >= 691 &last_fsb);
614 * stripe width and we are allocating past the allocation eof. 692 if (error)
615 */
616 if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_swidth
617 && (mp->m_flags & XFS_MOUNT_SWALLOC)
618 && (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)) && aeof) {
619 int eof;
620 xfs_fileoff_t new_last_fsb;
621
622 new_last_fsb = roundup_64(last_fsb, mp->m_swidth);
623 error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
624 if (error) {
625 return error;
626 }
627 if (eof) {
628 last_fsb = new_last_fsb;
629 }
630 /*
631 * Roundup the allocation request to a stripe unit (m_dalign) boundary
632 * if the file size is >= stripe unit size, and we are allocating past
633 * the allocation eof.
634 */
635 } else if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_dalign &&
636 (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)) && aeof) {
637 int eof;
638 xfs_fileoff_t new_last_fsb;
639 new_last_fsb = roundup_64(last_fsb, mp->m_dalign);
640 error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
641 if (error) {
642 return error;
643 }
644 if (eof) {
645 last_fsb = new_last_fsb;
646 }
647 /*
648 * Round up the allocation request to a real-time extent boundary
649 * if the file is on the real-time subvolume.
650 */
651 } else if (io->io_flags & XFS_IOCORE_RT && aeof) {
652 int eof;
653 xfs_fileoff_t new_last_fsb;
654
655 new_last_fsb = roundup_64(last_fsb, mp->m_sb.sb_rextsize);
656 error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
657 if (error) {
658 return error; 693 return error;
659 }
660 if (eof)
661 last_fsb = new_last_fsb;
662 } 694 }
695
696 nimaps = XFS_WRITE_IMAPS;
697 firstblock = NULLFSBLOCK;
663 error = xfs_bmapi(NULL, ip, offset_fsb, 698 error = xfs_bmapi(NULL, ip, offset_fsb,
664 (xfs_filblks_t)(last_fsb - offset_fsb), 699 (xfs_filblks_t)(last_fsb - offset_fsb),
665 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | 700 XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
666 XFS_BMAPI_ENTIRE, &firstblock, 1, imap, 701 XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
667 &nimaps, NULL); 702 &nimaps, NULL);
668 /* 703 if (error && (error != ENOSPC))
669 * This can be EDQUOT, if nimaps == 0
670 */
671 if (error && (error != ENOSPC)) {
672 return XFS_ERROR(error); 704 return XFS_ERROR(error);
673 } 705
674 /* 706 /*
675 * If bmapi returned us nothing, and if we didn't get back EDQUOT, 707 * If bmapi returned us nothing, and if we didn't get back EDQUOT,
676 * then we must have run out of space. 708 * then we must have run out of space - flush delalloc, and retry..
677 */ 709 */
678 if (nimaps == 0) { 710 if (nimaps == 0) {
679 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, 711 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
@@ -685,17 +717,21 @@ write_map:
685 goto retry; 717 goto retry;
686 } 718 }
687 719
688 *ret_imap = imap[0]; 720 if (!(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) {
689 *nmaps = 1;
690 if ( !(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) {
691 cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld " 721 cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld "
692 "start_block : %llx start_off : %llx blkcnt : %llx " 722 "start_block : %llx start_off : %llx blkcnt : %llx "
693 "extent-state : %x \n", 723 "extent-state : %x \n",
694 (ip->i_mount)->m_fsname, 724 (ip->i_mount)->m_fsname,
695 (long long)ip->i_ino, 725 (long long)ip->i_ino,
696 ret_imap->br_startblock, ret_imap->br_startoff, 726 (unsigned long long)ret_imap->br_startblock,
697 ret_imap->br_blockcount,ret_imap->br_state); 727 (unsigned long long)ret_imap->br_startoff,
728 (unsigned long long)ret_imap->br_blockcount,
729 ret_imap->br_state);
698 } 730 }
731
732 *ret_imap = imap[0];
733 *nmaps = 1;
734
699 return 0; 735 return 0;
700} 736}
701 737
@@ -821,17 +857,21 @@ xfs_iomap_write_allocate(
821 */ 857 */
822 858
823 for (i = 0; i < nimaps; i++) { 859 for (i = 0; i < nimaps; i++) {
824 if ( !(io->io_flags & XFS_IOCORE_RT) && 860 if (!(io->io_flags & XFS_IOCORE_RT) &&
825 !imap[i].br_startblock) { 861 !imap[i].br_startblock) {
826 cmn_err(CE_PANIC,"Access to block zero: " 862 cmn_err(CE_PANIC,"Access to block zero: "
827 "fs <%s> inode: %lld " 863 "fs <%s> inode: %lld "
828 "start_block : %llx start_off : %llx " 864 "start_block : %llx start_off : %llx "
829 "blkcnt : %llx extent-state : %x \n", 865 "blkcnt : %llx extent-state : %x \n",
830 (ip->i_mount)->m_fsname, 866 (ip->i_mount)->m_fsname,
831 (long long)ip->i_ino, 867 (long long)ip->i_ino,
832 imap[i].br_startblock, 868 (unsigned long long)
833 imap[i].br_startoff, 869 imap[i].br_startblock,
834 imap[i].br_blockcount,imap[i].br_state); 870 (unsigned long long)
871 imap[i].br_startoff,
872 (unsigned long long)
873 imap[i].br_blockcount,
874 imap[i].br_state);
835 } 875 }
836 if ((offset_fsb >= imap[i].br_startoff) && 876 if ((offset_fsb >= imap[i].br_startoff) &&
837 (offset_fsb < (imap[i].br_startoff + 877 (offset_fsb < (imap[i].br_startoff +
@@ -868,17 +908,17 @@ xfs_iomap_write_unwritten(
868{ 908{
869 xfs_mount_t *mp = ip->i_mount; 909 xfs_mount_t *mp = ip->i_mount;
870 xfs_iocore_t *io = &ip->i_iocore; 910 xfs_iocore_t *io = &ip->i_iocore;
871 xfs_trans_t *tp;
872 xfs_fileoff_t offset_fsb; 911 xfs_fileoff_t offset_fsb;
873 xfs_filblks_t count_fsb; 912 xfs_filblks_t count_fsb;
874 xfs_filblks_t numblks_fsb; 913 xfs_filblks_t numblks_fsb;
875 xfs_bmbt_irec_t imap; 914 xfs_fsblock_t firstfsb;
915 int nimaps;
916 xfs_trans_t *tp;
917 xfs_bmbt_irec_t imap;
918 xfs_bmap_free_t free_list;
919 uint resblks;
876 int committed; 920 int committed;
877 int error; 921 int error;
878 int nres;
879 int nimaps;
880 xfs_fsblock_t firstfsb;
881 xfs_bmap_free_t free_list;
882 922
883 xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, 923 xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN,
884 &ip->i_iocore, offset, count); 924 &ip->i_iocore, offset, count);
@@ -887,9 +927,9 @@ xfs_iomap_write_unwritten(
887 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 927 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
888 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); 928 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
889 929
890 do { 930 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
891 nres = XFS_DIOSTRAT_SPACE_RES(mp, 0);
892 931
932 do {
893 /* 933 /*
894 * set up a transaction to convert the range of extents 934 * set up a transaction to convert the range of extents
895 * from unwritten to real. Do allocations in a loop until 935 * from unwritten to real. Do allocations in a loop until
@@ -897,7 +937,7 @@ xfs_iomap_write_unwritten(
897 */ 937 */
898 938
899 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); 939 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
900 error = xfs_trans_reserve(tp, nres, 940 error = xfs_trans_reserve(tp, resblks,
901 XFS_WRITE_LOG_RES(mp), 0, 941 XFS_WRITE_LOG_RES(mp), 0,
902 XFS_TRANS_PERM_LOG_RES, 942 XFS_TRANS_PERM_LOG_RES,
903 XFS_WRITE_LOG_COUNT); 943 XFS_WRITE_LOG_COUNT);
@@ -916,7 +956,7 @@ xfs_iomap_write_unwritten(
916 XFS_BMAP_INIT(&free_list, &firstfsb); 956 XFS_BMAP_INIT(&free_list, &firstfsb);
917 nimaps = 1; 957 nimaps = 1;
918 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, 958 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
919 XFS_BMAPI_WRITE, &firstfsb, 959 XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
920 1, &imap, &nimaps, &free_list); 960 1, &imap, &nimaps, &free_list);
921 if (error) 961 if (error)
922 goto error_on_bmapi_transaction; 962 goto error_on_bmapi_transaction;
@@ -930,15 +970,17 @@ xfs_iomap_write_unwritten(
930 xfs_iunlock(ip, XFS_ILOCK_EXCL); 970 xfs_iunlock(ip, XFS_ILOCK_EXCL);
931 if (error) 971 if (error)
932 goto error0; 972 goto error0;
933 973
934 if ( !(io->io_flags & XFS_IOCORE_RT) && !imap.br_startblock) { 974 if ( !(io->io_flags & XFS_IOCORE_RT) && !imap.br_startblock) {
935 cmn_err(CE_PANIC,"Access to block zero: fs <%s> " 975 cmn_err(CE_PANIC,"Access to block zero: fs <%s> "
936 "inode: %lld start_block : %llx start_off : " 976 "inode: %lld start_block : %llx start_off : "
937 "%llx blkcnt : %llx extent-state : %x \n", 977 "%llx blkcnt : %llx extent-state : %x \n",
938 (ip->i_mount)->m_fsname, 978 (ip->i_mount)->m_fsname,
939 (long long)ip->i_ino, 979 (long long)ip->i_ino,
940 imap.br_startblock,imap.br_startoff, 980 (unsigned long long)imap.br_startblock,
941 imap.br_blockcount,imap.br_state); 981 (unsigned long long)imap.br_startoff,
982 (unsigned long long)imap.br_blockcount,
983 imap.br_state);
942 } 984 }
943 985
944 if ((numblks_fsb = imap.br_blockcount) == 0) { 986 if ((numblks_fsb = imap.br_blockcount) == 0) {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f63646ead816..c59450e1be40 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -56,6 +56,7 @@ xfs_bulkstat_one_iget(
56{ 56{
57 xfs_dinode_core_t *dic; /* dinode core info pointer */ 57 xfs_dinode_core_t *dic; /* dinode core info pointer */
58 xfs_inode_t *ip; /* incore inode pointer */ 58 xfs_inode_t *ip; /* incore inode pointer */
59 vnode_t *vp;
59 int error; 60 int error;
60 61
61 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno); 62 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno);
@@ -72,6 +73,7 @@ xfs_bulkstat_one_iget(
72 goto out_iput; 73 goto out_iput;
73 } 74 }
74 75
76 vp = XFS_ITOV(ip);
75 dic = &ip->i_d; 77 dic = &ip->i_d;
76 78
77 /* xfs_iget returns the following without needing 79 /* xfs_iget returns the following without needing
@@ -84,8 +86,7 @@ xfs_bulkstat_one_iget(
84 buf->bs_uid = dic->di_uid; 86 buf->bs_uid = dic->di_uid;
85 buf->bs_gid = dic->di_gid; 87 buf->bs_gid = dic->di_gid;
86 buf->bs_size = dic->di_size; 88 buf->bs_size = dic->di_size;
87 buf->bs_atime.tv_sec = dic->di_atime.t_sec; 89 vn_atime_to_bstime(vp, &buf->bs_atime);
88 buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
89 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; 90 buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
90 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; 91 buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
91 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; 92 buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 29af51275ca9..9176995160ed 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -178,6 +178,83 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
178#define xlog_trace_iclog(iclog,state) 178#define xlog_trace_iclog(iclog,state)
179#endif /* XFS_LOG_TRACE */ 179#endif /* XFS_LOG_TRACE */
180 180
181
182static void
183xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
184{
185 if (*qp) {
186 tic->t_next = (*qp);
187 tic->t_prev = (*qp)->t_prev;
188 (*qp)->t_prev->t_next = tic;
189 (*qp)->t_prev = tic;
190 } else {
191 tic->t_prev = tic->t_next = tic;
192 *qp = tic;
193 }
194
195 tic->t_flags |= XLOG_TIC_IN_Q;
196}
197
198static void
199xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
200{
201 if (tic == tic->t_next) {
202 *qp = NULL;
203 } else {
204 *qp = tic->t_next;
205 tic->t_next->t_prev = tic->t_prev;
206 tic->t_prev->t_next = tic->t_next;
207 }
208
209 tic->t_next = tic->t_prev = NULL;
210 tic->t_flags &= ~XLOG_TIC_IN_Q;
211}
212
213static void
214xlog_grant_sub_space(struct log *log, int bytes)
215{
216 log->l_grant_write_bytes -= bytes;
217 if (log->l_grant_write_bytes < 0) {
218 log->l_grant_write_bytes += log->l_logsize;
219 log->l_grant_write_cycle--;
220 }
221
222 log->l_grant_reserve_bytes -= bytes;
223 if ((log)->l_grant_reserve_bytes < 0) {
224 log->l_grant_reserve_bytes += log->l_logsize;
225 log->l_grant_reserve_cycle--;
226 }
227
228}
229
230static void
231xlog_grant_add_space_write(struct log *log, int bytes)
232{
233 log->l_grant_write_bytes += bytes;
234 if (log->l_grant_write_bytes > log->l_logsize) {
235 log->l_grant_write_bytes -= log->l_logsize;
236 log->l_grant_write_cycle++;
237 }
238}
239
240static void
241xlog_grant_add_space_reserve(struct log *log, int bytes)
242{
243 log->l_grant_reserve_bytes += bytes;
244 if (log->l_grant_reserve_bytes > log->l_logsize) {
245 log->l_grant_reserve_bytes -= log->l_logsize;
246 log->l_grant_reserve_cycle++;
247 }
248}
249
250static inline void
251xlog_grant_add_space(struct log *log, int bytes)
252{
253 xlog_grant_add_space_write(log, bytes);
254 xlog_grant_add_space_reserve(log, bytes);
255}
256
257
181/* 258/*
182 * NOTES: 259 * NOTES:
183 * 260 *
@@ -326,7 +403,7 @@ xfs_log_release_iclog(xfs_mount_t *mp,
326 403
327 if (xlog_state_release_iclog(log, iclog)) { 404 if (xlog_state_release_iclog(log, iclog)) {
328 xfs_force_shutdown(mp, XFS_LOG_IO_ERROR); 405 xfs_force_shutdown(mp, XFS_LOG_IO_ERROR);
329 return(EIO); 406 return EIO;
330 } 407 }
331 408
332 return 0; 409 return 0;
@@ -428,7 +505,7 @@ xfs_log_mount(xfs_mount_t *mp,
428 if (readonly) 505 if (readonly)
429 vfsp->vfs_flag &= ~VFS_RDONLY; 506 vfsp->vfs_flag &= ~VFS_RDONLY;
430 507
431 error = xlog_recover(mp->m_log, readonly); 508 error = xlog_recover(mp->m_log);
432 509
433 if (readonly) 510 if (readonly)
434 vfsp->vfs_flag |= VFS_RDONLY; 511 vfsp->vfs_flag |= VFS_RDONLY;
@@ -479,7 +556,7 @@ xfs_log_unmount(xfs_mount_t *mp)
479 556
480 error = xfs_log_unmount_write(mp); 557 error = xfs_log_unmount_write(mp);
481 xfs_log_unmount_dealloc(mp); 558 xfs_log_unmount_dealloc(mp);
482 return (error); 559 return error;
483} 560}
484 561
485/* 562/*
@@ -651,7 +728,7 @@ xfs_log_write(xfs_mount_t * mp,
651 if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) { 728 if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) {
652 xfs_force_shutdown(mp, XFS_LOG_IO_ERROR); 729 xfs_force_shutdown(mp, XFS_LOG_IO_ERROR);
653 } 730 }
654 return (error); 731 return error;
655} /* xfs_log_write */ 732} /* xfs_log_write */
656 733
657 734
@@ -759,7 +836,7 @@ xfs_log_need_covered(xfs_mount_t *mp)
759 needed = 1; 836 needed = 1;
760 } 837 }
761 LOG_UNLOCK(log, s); 838 LOG_UNLOCK(log, s);
762 return(needed); 839 return needed;
763} 840}
764 841
765/****************************************************************************** 842/******************************************************************************
@@ -926,7 +1003,7 @@ xlog_bdstrat_cb(struct xfs_buf *bp)
926 XFS_BUF_ERROR(bp, EIO); 1003 XFS_BUF_ERROR(bp, EIO);
927 XFS_BUF_STALE(bp); 1004 XFS_BUF_STALE(bp);
928 xfs_biodone(bp); 1005 xfs_biodone(bp);
929 return (XFS_ERROR(EIO)); 1006 return XFS_ERROR(EIO);
930 1007
931 1008
932} 1009}
@@ -1186,7 +1263,7 @@ xlog_commit_record(xfs_mount_t *mp,
1186 iclog, XLOG_COMMIT_TRANS))) { 1263 iclog, XLOG_COMMIT_TRANS))) {
1187 xfs_force_shutdown(mp, XFS_LOG_IO_ERROR); 1264 xfs_force_shutdown(mp, XFS_LOG_IO_ERROR);
1188 } 1265 }
1189 return (error); 1266 return error;
1190} /* xlog_commit_record */ 1267} /* xlog_commit_record */
1191 1268
1192 1269
@@ -1320,8 +1397,7 @@ xlog_sync(xlog_t *log,
1320 1397
1321 /* move grant heads by roundoff in sync */ 1398 /* move grant heads by roundoff in sync */
1322 s = GRANT_LOCK(log); 1399 s = GRANT_LOCK(log);
1323 XLOG_GRANT_ADD_SPACE(log, roundoff, 'w'); 1400 xlog_grant_add_space(log, roundoff);
1324 XLOG_GRANT_ADD_SPACE(log, roundoff, 'r');
1325 GRANT_UNLOCK(log, s); 1401 GRANT_UNLOCK(log, s);
1326 1402
1327 /* put cycle number in every block */ 1403 /* put cycle number in every block */
@@ -1384,7 +1460,7 @@ xlog_sync(xlog_t *log,
1384 if ((error = XFS_bwrite(bp))) { 1460 if ((error = XFS_bwrite(bp))) {
1385 xfs_ioerror_alert("xlog_sync", log->l_mp, bp, 1461 xfs_ioerror_alert("xlog_sync", log->l_mp, bp,
1386 XFS_BUF_ADDR(bp)); 1462 XFS_BUF_ADDR(bp));
1387 return (error); 1463 return error;
1388 } 1464 }
1389 if (split) { 1465 if (split) {
1390 bp = iclog->ic_log->l_xbuf; 1466 bp = iclog->ic_log->l_xbuf;
@@ -1422,10 +1498,10 @@ xlog_sync(xlog_t *log,
1422 if ((error = XFS_bwrite(bp))) { 1498 if ((error = XFS_bwrite(bp))) {
1423 xfs_ioerror_alert("xlog_sync (split)", log->l_mp, 1499 xfs_ioerror_alert("xlog_sync (split)", log->l_mp,
1424 bp, XFS_BUF_ADDR(bp)); 1500 bp, XFS_BUF_ADDR(bp));
1425 return (error); 1501 return error;
1426 } 1502 }
1427 } 1503 }
1428 return (0); 1504 return 0;
1429} /* xlog_sync */ 1505} /* xlog_sync */
1430 1506
1431 1507
@@ -1515,7 +1591,6 @@ xlog_state_finish_copy(xlog_t *log,
1515 * print out info relating to regions written which consume 1591 * print out info relating to regions written which consume
1516 * the reservation 1592 * the reservation
1517 */ 1593 */
1518#if defined(XFS_LOG_RES_DEBUG)
1519STATIC void 1594STATIC void
1520xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) 1595xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
1521{ 1596{
@@ -1605,11 +1680,11 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
1605 ticket->t_res_arr_sum, ticket->t_res_o_flow, 1680 ticket->t_res_arr_sum, ticket->t_res_o_flow,
1606 ticket->t_res_num_ophdrs, ophdr_spc, 1681 ticket->t_res_num_ophdrs, ophdr_spc,
1607 ticket->t_res_arr_sum + 1682 ticket->t_res_arr_sum +
1608 ticket->t_res_o_flow + ophdr_spc, 1683 ticket->t_res_o_flow + ophdr_spc,
1609 ticket->t_res_num); 1684 ticket->t_res_num);
1610 1685
1611 for (i = 0; i < ticket->t_res_num; i++) { 1686 for (i = 0; i < ticket->t_res_num; i++) {
1612 uint r_type = ticket->t_res_arr[i].r_type; 1687 uint r_type = ticket->t_res_arr[i].r_type;
1613 cmn_err(CE_WARN, 1688 cmn_err(CE_WARN,
1614 "region[%u]: %s - %u bytes\n", 1689 "region[%u]: %s - %u bytes\n",
1615 i, 1690 i,
@@ -1618,9 +1693,6 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
1618 ticket->t_res_arr[i].r_len); 1693 ticket->t_res_arr[i].r_len);
1619 } 1694 }
1620} 1695}
1621#else
1622#define xlog_print_tic_res(mp, ticket)
1623#endif
1624 1696
1625/* 1697/*
1626 * Write some region out to in-core log 1698 * Write some region out to in-core log
@@ -1726,7 +1798,7 @@ xlog_write(xfs_mount_t * mp,
1726 for (index = 0; index < nentries; ) { 1798 for (index = 0; index < nentries; ) {
1727 if ((error = xlog_state_get_iclog_space(log, len, &iclog, ticket, 1799 if ((error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
1728 &contwr, &log_offset))) 1800 &contwr, &log_offset)))
1729 return (error); 1801 return error;
1730 1802
1731 ASSERT(log_offset <= iclog->ic_size - 1); 1803 ASSERT(log_offset <= iclog->ic_size - 1);
1732 ptr = (__psint_t) ((char *)iclog->ic_datap+log_offset); 1804 ptr = (__psint_t) ((char *)iclog->ic_datap+log_offset);
@@ -1831,7 +1903,7 @@ xlog_write(xfs_mount_t * mp,
1831 xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); 1903 xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
1832 record_cnt = data_cnt = 0; 1904 record_cnt = data_cnt = 0;
1833 if ((error = xlog_state_release_iclog(log, iclog))) 1905 if ((error = xlog_state_release_iclog(log, iclog)))
1834 return (error); 1906 return error;
1835 break; /* don't increment index */ 1907 break; /* don't increment index */
1836 } else { /* copied entire region */ 1908 } else { /* copied entire region */
1837 index++; 1909 index++;
@@ -1845,7 +1917,7 @@ xlog_write(xfs_mount_t * mp,
1845 ASSERT(flags & XLOG_COMMIT_TRANS); 1917 ASSERT(flags & XLOG_COMMIT_TRANS);
1846 *commit_iclog = iclog; 1918 *commit_iclog = iclog;
1847 } else if ((error = xlog_state_release_iclog(log, iclog))) 1919 } else if ((error = xlog_state_release_iclog(log, iclog)))
1848 return (error); 1920 return error;
1849 if (index == nentries) 1921 if (index == nentries)
1850 return 0; /* we are done */ 1922 return 0; /* we are done */
1851 else 1923 else
@@ -1862,7 +1934,7 @@ xlog_write(xfs_mount_t * mp,
1862 *commit_iclog = iclog; 1934 *commit_iclog = iclog;
1863 return 0; 1935 return 0;
1864 } 1936 }
1865 return (xlog_state_release_iclog(log, iclog)); 1937 return xlog_state_release_iclog(log, iclog);
1866} /* xlog_write */ 1938} /* xlog_write */
1867 1939
1868 1940
@@ -1978,7 +2050,7 @@ xlog_get_lowest_lsn(
1978 } 2050 }
1979 lsn_log = lsn_log->ic_next; 2051 lsn_log = lsn_log->ic_next;
1980 } while (lsn_log != log->l_iclog); 2052 } while (lsn_log != log->l_iclog);
1981 return(lowest_lsn); 2053 return lowest_lsn;
1982} 2054}
1983 2055
1984 2056
@@ -2330,7 +2402,7 @@ restart:
2330 if (iclog->ic_refcnt == 1) { 2402 if (iclog->ic_refcnt == 1) {
2331 LOG_UNLOCK(log, s); 2403 LOG_UNLOCK(log, s);
2332 if ((error = xlog_state_release_iclog(log, iclog))) 2404 if ((error = xlog_state_release_iclog(log, iclog)))
2333 return (error); 2405 return error;
2334 } else { 2406 } else {
2335 iclog->ic_refcnt--; 2407 iclog->ic_refcnt--;
2336 LOG_UNLOCK(log, s); 2408 LOG_UNLOCK(log, s);
@@ -2389,7 +2461,7 @@ xlog_grant_log_space(xlog_t *log,
2389 2461
2390 /* something is already sleeping; insert new transaction at end */ 2462 /* something is already sleeping; insert new transaction at end */
2391 if (log->l_reserve_headq) { 2463 if (log->l_reserve_headq) {
2392 XLOG_INS_TICKETQ(log->l_reserve_headq, tic); 2464 xlog_ins_ticketq(&log->l_reserve_headq, tic);
2393 xlog_trace_loggrant(log, tic, 2465 xlog_trace_loggrant(log, tic,
2394 "xlog_grant_log_space: sleep 1"); 2466 "xlog_grant_log_space: sleep 1");
2395 /* 2467 /*
@@ -2422,7 +2494,7 @@ redo:
2422 log->l_grant_reserve_bytes); 2494 log->l_grant_reserve_bytes);
2423 if (free_bytes < need_bytes) { 2495 if (free_bytes < need_bytes) {
2424 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2496 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2425 XLOG_INS_TICKETQ(log->l_reserve_headq, tic); 2497 xlog_ins_ticketq(&log->l_reserve_headq, tic);
2426 xlog_trace_loggrant(log, tic, 2498 xlog_trace_loggrant(log, tic,
2427 "xlog_grant_log_space: sleep 2"); 2499 "xlog_grant_log_space: sleep 2");
2428 XFS_STATS_INC(xs_sleep_logspace); 2500 XFS_STATS_INC(xs_sleep_logspace);
@@ -2439,11 +2511,10 @@ redo:
2439 s = GRANT_LOCK(log); 2511 s = GRANT_LOCK(log);
2440 goto redo; 2512 goto redo;
2441 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2513 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2442 XLOG_DEL_TICKETQ(log->l_reserve_headq, tic); 2514 xlog_del_ticketq(&log->l_reserve_headq, tic);
2443 2515
2444 /* we've got enough space */ 2516 /* we've got enough space */
2445 XLOG_GRANT_ADD_SPACE(log, need_bytes, 'w'); 2517 xlog_grant_add_space(log, need_bytes);
2446 XLOG_GRANT_ADD_SPACE(log, need_bytes, 'r');
2447#ifdef DEBUG 2518#ifdef DEBUG
2448 tail_lsn = log->l_tail_lsn; 2519 tail_lsn = log->l_tail_lsn;
2449 /* 2520 /*
@@ -2464,7 +2535,7 @@ redo:
2464 2535
2465 error_return: 2536 error_return:
2466 if (tic->t_flags & XLOG_TIC_IN_Q) 2537 if (tic->t_flags & XLOG_TIC_IN_Q)
2467 XLOG_DEL_TICKETQ(log->l_reserve_headq, tic); 2538 xlog_del_ticketq(&log->l_reserve_headq, tic);
2468 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret"); 2539 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret");
2469 /* 2540 /*
2470 * If we are failing, make sure the ticket doesn't have any 2541 * If we are failing, make sure the ticket doesn't have any
@@ -2498,7 +2569,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2498 XLOG_TIC_RESET_RES(tic); 2569 XLOG_TIC_RESET_RES(tic);
2499 2570
2500 if (tic->t_cnt > 0) 2571 if (tic->t_cnt > 0)
2501 return (0); 2572 return 0;
2502 2573
2503#ifdef DEBUG 2574#ifdef DEBUG
2504 if (log->l_flags & XLOG_ACTIVE_RECOVERY) 2575 if (log->l_flags & XLOG_ACTIVE_RECOVERY)
@@ -2533,7 +2604,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2533 2604
2534 if (ntic != log->l_write_headq) { 2605 if (ntic != log->l_write_headq) {
2535 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2606 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2536 XLOG_INS_TICKETQ(log->l_write_headq, tic); 2607 xlog_ins_ticketq(&log->l_write_headq, tic);
2537 2608
2538 xlog_trace_loggrant(log, tic, 2609 xlog_trace_loggrant(log, tic,
2539 "xlog_regrant_write_log_space: sleep 1"); 2610 "xlog_regrant_write_log_space: sleep 1");
@@ -2565,7 +2636,7 @@ redo:
2565 log->l_grant_write_bytes); 2636 log->l_grant_write_bytes);
2566 if (free_bytes < need_bytes) { 2637 if (free_bytes < need_bytes) {
2567 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2638 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2568 XLOG_INS_TICKETQ(log->l_write_headq, tic); 2639 xlog_ins_ticketq(&log->l_write_headq, tic);
2569 XFS_STATS_INC(xs_sleep_logspace); 2640 XFS_STATS_INC(xs_sleep_logspace);
2570 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); 2641 sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
2571 2642
@@ -2581,9 +2652,10 @@ redo:
2581 s = GRANT_LOCK(log); 2652 s = GRANT_LOCK(log);
2582 goto redo; 2653 goto redo;
2583 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2654 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2584 XLOG_DEL_TICKETQ(log->l_write_headq, tic); 2655 xlog_del_ticketq(&log->l_write_headq, tic);
2585 2656
2586 XLOG_GRANT_ADD_SPACE(log, need_bytes, 'w'); /* we've got enough space */ 2657 /* we've got enough space */
2658 xlog_grant_add_space_write(log, need_bytes);
2587#ifdef DEBUG 2659#ifdef DEBUG
2588 tail_lsn = log->l_tail_lsn; 2660 tail_lsn = log->l_tail_lsn;
2589 if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) { 2661 if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
@@ -2595,12 +2667,12 @@ redo:
2595 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: exit"); 2667 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: exit");
2596 xlog_verify_grant_head(log, 1); 2668 xlog_verify_grant_head(log, 1);
2597 GRANT_UNLOCK(log, s); 2669 GRANT_UNLOCK(log, s);
2598 return (0); 2670 return 0;
2599 2671
2600 2672
2601 error_return: 2673 error_return:
2602 if (tic->t_flags & XLOG_TIC_IN_Q) 2674 if (tic->t_flags & XLOG_TIC_IN_Q)
2603 XLOG_DEL_TICKETQ(log->l_reserve_headq, tic); 2675 xlog_del_ticketq(&log->l_reserve_headq, tic);
2604 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret"); 2676 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret");
2605 /* 2677 /*
2606 * If we are failing, make sure the ticket doesn't have any 2678 * If we are failing, make sure the ticket doesn't have any
@@ -2633,8 +2705,7 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2633 ticket->t_cnt--; 2705 ticket->t_cnt--;
2634 2706
2635 s = GRANT_LOCK(log); 2707 s = GRANT_LOCK(log);
2636 XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w'); 2708 xlog_grant_sub_space(log, ticket->t_curr_res);
2637 XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');
2638 ticket->t_curr_res = ticket->t_unit_res; 2709 ticket->t_curr_res = ticket->t_unit_res;
2639 XLOG_TIC_RESET_RES(ticket); 2710 XLOG_TIC_RESET_RES(ticket);
2640 xlog_trace_loggrant(log, ticket, 2711 xlog_trace_loggrant(log, ticket,
@@ -2647,7 +2718,7 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2647 return; 2718 return;
2648 } 2719 }
2649 2720
2650 XLOG_GRANT_ADD_SPACE(log, ticket->t_unit_res, 'r'); 2721 xlog_grant_add_space_reserve(log, ticket->t_unit_res);
2651 xlog_trace_loggrant(log, ticket, 2722 xlog_trace_loggrant(log, ticket,
2652 "xlog_regrant_reserve_log_space: exit"); 2723 "xlog_regrant_reserve_log_space: exit");
2653 xlog_verify_grant_head(log, 0); 2724 xlog_verify_grant_head(log, 0);
@@ -2683,8 +2754,7 @@ xlog_ungrant_log_space(xlog_t *log,
2683 s = GRANT_LOCK(log); 2754 s = GRANT_LOCK(log);
2684 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter"); 2755 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter");
2685 2756
2686 XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w'); 2757 xlog_grant_sub_space(log, ticket->t_curr_res);
2687 XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');
2688 2758
2689 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current"); 2759 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current");
2690 2760
@@ -2693,8 +2763,7 @@ xlog_ungrant_log_space(xlog_t *log,
2693 */ 2763 */
2694 if (ticket->t_cnt > 0) { 2764 if (ticket->t_cnt > 0) {
2695 ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV); 2765 ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);
2696 XLOG_GRANT_SUB_SPACE(log, ticket->t_unit_res*ticket->t_cnt,'w'); 2766 xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt);
2697 XLOG_GRANT_SUB_SPACE(log, ticket->t_unit_res*ticket->t_cnt,'r');
2698 } 2767 }
2699 2768
2700 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit"); 2769 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit");
@@ -2768,7 +2837,7 @@ xlog_state_release_iclog(xlog_t *log,
2768 if (sync) { 2837 if (sync) {
2769 return xlog_sync(log, iclog); 2838 return xlog_sync(log, iclog);
2770 } 2839 }
2771 return (0); 2840 return 0;
2772 2841
2773} /* xlog_state_release_iclog */ 2842} /* xlog_state_release_iclog */
2774 2843
@@ -3058,7 +3127,7 @@ try_again:
3058 } while (iclog != log->l_iclog); 3127 } while (iclog != log->l_iclog);
3059 3128
3060 LOG_UNLOCK(log, s); 3129 LOG_UNLOCK(log, s);
3061 return (0); 3130 return 0;
3062} /* xlog_state_sync */ 3131} /* xlog_state_sync */
3063 3132
3064 3133
@@ -3476,12 +3545,12 @@ xlog_state_ioerror(
3476 ic->ic_state = XLOG_STATE_IOERROR; 3545 ic->ic_state = XLOG_STATE_IOERROR;
3477 ic = ic->ic_next; 3546 ic = ic->ic_next;
3478 } while (ic != iclog); 3547 } while (ic != iclog);
3479 return (0); 3548 return 0;
3480 } 3549 }
3481 /* 3550 /*
3482 * Return non-zero, if state transition has already happened. 3551 * Return non-zero, if state transition has already happened.
3483 */ 3552 */
3484 return (1); 3553 return 1;
3485} 3554}
3486 3555
3487/* 3556/*
@@ -3518,7 +3587,7 @@ xfs_log_force_umount(
3518 log->l_flags & XLOG_ACTIVE_RECOVERY) { 3587 log->l_flags & XLOG_ACTIVE_RECOVERY) {
3519 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; 3588 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
3520 XFS_BUF_DONE(mp->m_sb_bp); 3589 XFS_BUF_DONE(mp->m_sb_bp);
3521 return (0); 3590 return 0;
3522 } 3591 }
3523 3592
3524 /* 3593 /*
@@ -3527,7 +3596,7 @@ xfs_log_force_umount(
3527 */ 3596 */
3528 if (logerror && log->l_iclog->ic_state & XLOG_STATE_IOERROR) { 3597 if (logerror && log->l_iclog->ic_state & XLOG_STATE_IOERROR) {
3529 ASSERT(XLOG_FORCED_SHUTDOWN(log)); 3598 ASSERT(XLOG_FORCED_SHUTDOWN(log));
3530 return (1); 3599 return 1;
3531 } 3600 }
3532 retval = 0; 3601 retval = 0;
3533 /* 3602 /*
@@ -3609,7 +3678,7 @@ xfs_log_force_umount(
3609 } 3678 }
3610#endif 3679#endif
3611 /* return non-zero if log IOERROR transition had already happened */ 3680 /* return non-zero if log IOERROR transition had already happened */
3612 return (retval); 3681 return retval;
3613} 3682}
3614 3683
3615STATIC int 3684STATIC int
@@ -3623,8 +3692,8 @@ xlog_iclogs_empty(xlog_t *log)
3623 * any language. 3692 * any language.
3624 */ 3693 */
3625 if (iclog->ic_header.h_num_logops) 3694 if (iclog->ic_header.h_num_logops)
3626 return(0); 3695 return 0;
3627 iclog = iclog->ic_next; 3696 iclog = iclog->ic_next;
3628 } while (iclog != log->l_iclog); 3697 } while (iclog != log->l_iclog);
3629 return(1); 3698 return 1;
3630} 3699}
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 158829ca56f6..4b2ac88dbb83 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -30,13 +30,7 @@
30 * By comparing each compnent, we don't have to worry about extra 30 * By comparing each compnent, we don't have to worry about extra
31 * endian issues in treating two 32 bit numbers as one 64 bit number 31 * endian issues in treating two 32 bit numbers as one 64 bit number
32 */ 32 */
33static 33static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
34#if defined(__GNUC__) && (__GNUC__ == 2) && ( (__GNUC_MINOR__ == 95) || (__GNUC_MINOR__ == 96))
35__attribute__((unused)) /* gcc 2.95, 2.96 miscompile this when inlined */
36#else
37__inline__
38#endif
39xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
40{ 34{
41 if (CYCLE_LSN(lsn1) != CYCLE_LSN(lsn2)) 35 if (CYCLE_LSN(lsn1) != CYCLE_LSN(lsn2))
42 return (CYCLE_LSN(lsn1)<CYCLE_LSN(lsn2))? -999 : 999; 36 return (CYCLE_LSN(lsn1)<CYCLE_LSN(lsn2))? -999 : 999;
@@ -102,7 +96,6 @@ xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
102 96
103 97
104/* Region types for iovec's i_type */ 98/* Region types for iovec's i_type */
105#if defined(XFS_LOG_RES_DEBUG)
106#define XLOG_REG_TYPE_BFORMAT 1 99#define XLOG_REG_TYPE_BFORMAT 1
107#define XLOG_REG_TYPE_BCHUNK 2 100#define XLOG_REG_TYPE_BCHUNK 2
108#define XLOG_REG_TYPE_EFI_FORMAT 3 101#define XLOG_REG_TYPE_EFI_FORMAT 3
@@ -123,21 +116,13 @@ xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
123#define XLOG_REG_TYPE_COMMIT 18 116#define XLOG_REG_TYPE_COMMIT 18
124#define XLOG_REG_TYPE_TRANSHDR 19 117#define XLOG_REG_TYPE_TRANSHDR 19
125#define XLOG_REG_TYPE_MAX 19 118#define XLOG_REG_TYPE_MAX 19
126#endif
127 119
128#if defined(XFS_LOG_RES_DEBUG)
129#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t)) 120#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t))
130#else
131#define XLOG_VEC_SET_TYPE(vecp, t)
132#endif
133
134 121
135typedef struct xfs_log_iovec { 122typedef struct xfs_log_iovec {
136 xfs_caddr_t i_addr; /* beginning address of region */ 123 xfs_caddr_t i_addr; /* beginning address of region */
137 int i_len; /* length in bytes of region */ 124 int i_len; /* length in bytes of region */
138#if defined(XFS_LOG_RES_DEBUG) 125 uint i_type; /* type of region */
139 uint i_type; /* type of region */
140#endif
141} xfs_log_iovec_t; 126} xfs_log_iovec_t;
142 127
143typedef void* xfs_log_ticket_t; 128typedef void* xfs_log_ticket_t;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 4518b188ade6..34bcbf50789c 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -253,7 +253,6 @@ typedef __uint32_t xlog_tid_t;
253 253
254 254
255/* Ticket reservation region accounting */ 255/* Ticket reservation region accounting */
256#if defined(XFS_LOG_RES_DEBUG)
257#define XLOG_TIC_LEN_MAX 15 256#define XLOG_TIC_LEN_MAX 15
258#define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \ 257#define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \
259 (t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0) 258 (t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0)
@@ -278,15 +277,9 @@ typedef __uint32_t xlog_tid_t;
278 * we don't care about. 277 * we don't care about.
279 */ 278 */
280typedef struct xlog_res { 279typedef struct xlog_res {
281 uint r_len; 280 uint r_len; /* region length :4 */
282 uint r_type; 281 uint r_type; /* region's transaction type :4 */
283} xlog_res_t; 282} xlog_res_t;
284#else
285#define XLOG_TIC_RESET_RES(t)
286#define XLOG_TIC_ADD_OPHDR(t)
287#define XLOG_TIC_ADD_REGION(t, len, type)
288#endif
289
290 283
291typedef struct xlog_ticket { 284typedef struct xlog_ticket {
292 sv_t t_sema; /* sleep on this semaphore : 20 */ 285 sv_t t_sema; /* sleep on this semaphore : 20 */
@@ -301,14 +294,12 @@ typedef struct xlog_ticket {
301 char t_flags; /* properties of reservation : 1 */ 294 char t_flags; /* properties of reservation : 1 */
302 uint t_trans_type; /* transaction type : 4 */ 295 uint t_trans_type; /* transaction type : 4 */
303 296
304#if defined (XFS_LOG_RES_DEBUG)
305 /* reservation array fields */ 297 /* reservation array fields */
306 uint t_res_num; /* num in array : 4 */ 298 uint t_res_num; /* num in array : 4 */
307 xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : X */
308 uint t_res_num_ophdrs; /* num op hdrs : 4 */ 299 uint t_res_num_ophdrs; /* num op hdrs : 4 */
309 uint t_res_arr_sum; /* array sum : 4 */ 300 uint t_res_arr_sum; /* array sum : 4 */
310 uint t_res_o_flow; /* sum overflow : 4 */ 301 uint t_res_o_flow; /* sum overflow : 4 */
311#endif 302 xlog_res_t t_res_arr[XLOG_TIC_LEN_MAX]; /* array of res : 8 * 15 */
312} xlog_ticket_t; 303} xlog_ticket_t;
313 304
314#endif 305#endif
@@ -494,71 +485,13 @@ typedef struct log {
494 485
495#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) 486#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
496 487
497#define XLOG_GRANT_SUB_SPACE(log,bytes,type) \
498 { \
499 if (type == 'w') { \
500 (log)->l_grant_write_bytes -= (bytes); \
501 if ((log)->l_grant_write_bytes < 0) { \
502 (log)->l_grant_write_bytes += (log)->l_logsize; \
503 (log)->l_grant_write_cycle--; \
504 } \
505 } else { \
506 (log)->l_grant_reserve_bytes -= (bytes); \
507 if ((log)->l_grant_reserve_bytes < 0) { \
508 (log)->l_grant_reserve_bytes += (log)->l_logsize;\
509 (log)->l_grant_reserve_cycle--; \
510 } \
511 } \
512 }
513#define XLOG_GRANT_ADD_SPACE(log,bytes,type) \
514 { \
515 if (type == 'w') { \
516 (log)->l_grant_write_bytes += (bytes); \
517 if ((log)->l_grant_write_bytes > (log)->l_logsize) { \
518 (log)->l_grant_write_bytes -= (log)->l_logsize; \
519 (log)->l_grant_write_cycle++; \
520 } \
521 } else { \
522 (log)->l_grant_reserve_bytes += (bytes); \
523 if ((log)->l_grant_reserve_bytes > (log)->l_logsize) { \
524 (log)->l_grant_reserve_bytes -= (log)->l_logsize;\
525 (log)->l_grant_reserve_cycle++; \
526 } \
527 } \
528 }
529#define XLOG_INS_TICKETQ(q, tic) \
530 { \
531 if (q) { \
532 (tic)->t_next = (q); \
533 (tic)->t_prev = (q)->t_prev; \
534 (q)->t_prev->t_next = (tic); \
535 (q)->t_prev = (tic); \
536 } else { \
537 (tic)->t_prev = (tic)->t_next = (tic); \
538 (q) = (tic); \
539 } \
540 (tic)->t_flags |= XLOG_TIC_IN_Q; \
541 }
542#define XLOG_DEL_TICKETQ(q, tic) \
543 { \
544 if ((tic) == (tic)->t_next) { \
545 (q) = NULL; \
546 } else { \
547 (q) = (tic)->t_next; \
548 (tic)->t_next->t_prev = (tic)->t_prev; \
549 (tic)->t_prev->t_next = (tic)->t_next; \
550 } \
551 (tic)->t_next = (tic)->t_prev = NULL; \
552 (tic)->t_flags &= ~XLOG_TIC_IN_Q; \
553 }
554 488
555/* common routines */ 489/* common routines */
556extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); 490extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
557extern int xlog_find_tail(xlog_t *log, 491extern int xlog_find_tail(xlog_t *log,
558 xfs_daddr_t *head_blk, 492 xfs_daddr_t *head_blk,
559 xfs_daddr_t *tail_blk, 493 xfs_daddr_t *tail_blk);
560 int readonly); 494extern int xlog_recover(xlog_t *log);
561extern int xlog_recover(xlog_t *log, int readonly);
562extern int xlog_recover_finish(xlog_t *log, int mfsi_flags); 495extern int xlog_recover_finish(xlog_t *log, int mfsi_flags);
563extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); 496extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
564extern void xlog_recover_process_iunlinks(xlog_t *log); 497extern void xlog_recover_process_iunlinks(xlog_t *log);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8ab7df768063..7d46cbd6a07a 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -783,8 +783,7 @@ int
783xlog_find_tail( 783xlog_find_tail(
784 xlog_t *log, 784 xlog_t *log,
785 xfs_daddr_t *head_blk, 785 xfs_daddr_t *head_blk,
786 xfs_daddr_t *tail_blk, 786 xfs_daddr_t *tail_blk)
787 int readonly)
788{ 787{
789 xlog_rec_header_t *rhead; 788 xlog_rec_header_t *rhead;
790 xlog_op_header_t *op_head; 789 xlog_op_header_t *op_head;
@@ -2563,10 +2562,12 @@ xlog_recover_do_quotaoff_trans(
2563 2562
2564 /* 2563 /*
2565 * The logitem format's flag tells us if this was user quotaoff, 2564 * The logitem format's flag tells us if this was user quotaoff,
2566 * group quotaoff or both. 2565 * group/project quotaoff or both.
2567 */ 2566 */
2568 if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) 2567 if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)
2569 log->l_quotaoffs_flag |= XFS_DQ_USER; 2568 log->l_quotaoffs_flag |= XFS_DQ_USER;
2569 if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
2570 log->l_quotaoffs_flag |= XFS_DQ_PROJ;
2570 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) 2571 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
2571 log->l_quotaoffs_flag |= XFS_DQ_GROUP; 2572 log->l_quotaoffs_flag |= XFS_DQ_GROUP;
2572 2573
@@ -3890,14 +3891,13 @@ xlog_do_recover(
3890 */ 3891 */
3891int 3892int
3892xlog_recover( 3893xlog_recover(
3893 xlog_t *log, 3894 xlog_t *log)
3894 int readonly)
3895{ 3895{
3896 xfs_daddr_t head_blk, tail_blk; 3896 xfs_daddr_t head_blk, tail_blk;
3897 int error; 3897 int error;
3898 3898
3899 /* find the tail of the log */ 3899 /* find the tail of the log */
3900 if ((error = xlog_find_tail(log, &head_blk, &tail_blk, readonly))) 3900 if ((error = xlog_find_tail(log, &head_blk, &tail_blk)))
3901 return error; 3901 return error;
3902 3902
3903 if (tail_blk != head_blk) { 3903 if (tail_blk != head_blk) {
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 541d5dd474be..62188ea392c7 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -51,7 +51,7 @@ STATIC int xfs_uuid_mount(xfs_mount_t *);
51STATIC void xfs_uuid_unmount(xfs_mount_t *mp); 51STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
52STATIC void xfs_unmountfs_wait(xfs_mount_t *); 52STATIC void xfs_unmountfs_wait(xfs_mount_t *);
53 53
54static struct { 54static const struct {
55 short offset; 55 short offset;
56 short type; /* 0 = integer 56 short type; /* 0 = integer
57 * 1 = binary / string (no translation) 57 * 1 = binary / string (no translation)
@@ -117,7 +117,7 @@ xfs_mount_init(void)
117 117
118 AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail"); 118 AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
119 spinlock_init(&mp->m_sb_lock, "xfs_sb"); 119 spinlock_init(&mp->m_sb_lock, "xfs_sb");
120 mutex_init(&mp->m_ilock, MUTEX_DEFAULT, "xfs_ilock"); 120 mutex_init(&mp->m_ilock);
121 initnsema(&mp->m_growlock, 1, "xfs_grow"); 121 initnsema(&mp->m_growlock, 1, "xfs_grow");
122 /* 122 /*
123 * Initialize the AIL. 123 * Initialize the AIL.
@@ -646,7 +646,7 @@ xfs_mountfs(
646 646
647 if (mp->m_sb_bp == NULL) { 647 if (mp->m_sb_bp == NULL) {
648 if ((error = xfs_readsb(mp))) { 648 if ((error = xfs_readsb(mp))) {
649 return (error); 649 return error;
650 } 650 }
651 } 651 }
652 xfs_mount_common(mp, sbp); 652 xfs_mount_common(mp, sbp);
@@ -889,7 +889,7 @@ xfs_mountfs(
889 * For client case we are done now 889 * For client case we are done now
890 */ 890 */
891 if (mfsi_flags & XFS_MFSI_CLIENT) { 891 if (mfsi_flags & XFS_MFSI_CLIENT) {
892 return(0); 892 return 0;
893 } 893 }
894 894
895 /* 895 /*
@@ -1077,8 +1077,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1077 1077
1078 xfs_iflush_all(mp); 1078 xfs_iflush_all(mp);
1079 1079
1080 XFS_QM_DQPURGEALL(mp, 1080 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
1081 XFS_QMOPT_UQUOTA | XFS_QMOPT_GQUOTA | XFS_QMOPT_UMOUNTING);
1082 1081
1083 /* 1082 /*
1084 * Flush out the log synchronously so that we know for sure 1083 * Flush out the log synchronously so that we know for sure
@@ -1183,7 +1182,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1183 xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly"); 1182 xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly");
1184 } 1183 }
1185 xfs_buf_relse(sbp); 1184 xfs_buf_relse(sbp);
1186 return (error); 1185 return error;
1187} 1186}
1188 1187
1189/* 1188/*
@@ -1258,19 +1257,19 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1258 lcounter += delta; 1257 lcounter += delta;
1259 if (lcounter < 0) { 1258 if (lcounter < 0) {
1260 ASSERT(0); 1259 ASSERT(0);
1261 return (XFS_ERROR(EINVAL)); 1260 return XFS_ERROR(EINVAL);
1262 } 1261 }
1263 mp->m_sb.sb_icount = lcounter; 1262 mp->m_sb.sb_icount = lcounter;
1264 return (0); 1263 return 0;
1265 case XFS_SBS_IFREE: 1264 case XFS_SBS_IFREE:
1266 lcounter = (long long)mp->m_sb.sb_ifree; 1265 lcounter = (long long)mp->m_sb.sb_ifree;
1267 lcounter += delta; 1266 lcounter += delta;
1268 if (lcounter < 0) { 1267 if (lcounter < 0) {
1269 ASSERT(0); 1268 ASSERT(0);
1270 return (XFS_ERROR(EINVAL)); 1269 return XFS_ERROR(EINVAL);
1271 } 1270 }
1272 mp->m_sb.sb_ifree = lcounter; 1271 mp->m_sb.sb_ifree = lcounter;
1273 return (0); 1272 return 0;
1274 case XFS_SBS_FDBLOCKS: 1273 case XFS_SBS_FDBLOCKS:
1275 1274
1276 lcounter = (long long)mp->m_sb.sb_fdblocks; 1275 lcounter = (long long)mp->m_sb.sb_fdblocks;
@@ -1297,101 +1296,101 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1297 if (rsvd) { 1296 if (rsvd) {
1298 lcounter = (long long)mp->m_resblks_avail + delta; 1297 lcounter = (long long)mp->m_resblks_avail + delta;
1299 if (lcounter < 0) { 1298 if (lcounter < 0) {
1300 return (XFS_ERROR(ENOSPC)); 1299 return XFS_ERROR(ENOSPC);
1301 } 1300 }
1302 mp->m_resblks_avail = lcounter; 1301 mp->m_resblks_avail = lcounter;
1303 return (0); 1302 return 0;
1304 } else { /* not reserved */ 1303 } else { /* not reserved */
1305 return (XFS_ERROR(ENOSPC)); 1304 return XFS_ERROR(ENOSPC);
1306 } 1305 }
1307 } 1306 }
1308 } 1307 }
1309 1308
1310 mp->m_sb.sb_fdblocks = lcounter; 1309 mp->m_sb.sb_fdblocks = lcounter;
1311 return (0); 1310 return 0;
1312 case XFS_SBS_FREXTENTS: 1311 case XFS_SBS_FREXTENTS:
1313 lcounter = (long long)mp->m_sb.sb_frextents; 1312 lcounter = (long long)mp->m_sb.sb_frextents;
1314 lcounter += delta; 1313 lcounter += delta;
1315 if (lcounter < 0) { 1314 if (lcounter < 0) {
1316 return (XFS_ERROR(ENOSPC)); 1315 return XFS_ERROR(ENOSPC);
1317 } 1316 }
1318 mp->m_sb.sb_frextents = lcounter; 1317 mp->m_sb.sb_frextents = lcounter;
1319 return (0); 1318 return 0;
1320 case XFS_SBS_DBLOCKS: 1319 case XFS_SBS_DBLOCKS:
1321 lcounter = (long long)mp->m_sb.sb_dblocks; 1320 lcounter = (long long)mp->m_sb.sb_dblocks;
1322 lcounter += delta; 1321 lcounter += delta;
1323 if (lcounter < 0) { 1322 if (lcounter < 0) {
1324 ASSERT(0); 1323 ASSERT(0);
1325 return (XFS_ERROR(EINVAL)); 1324 return XFS_ERROR(EINVAL);
1326 } 1325 }
1327 mp->m_sb.sb_dblocks = lcounter; 1326 mp->m_sb.sb_dblocks = lcounter;
1328 return (0); 1327 return 0;
1329 case XFS_SBS_AGCOUNT: 1328 case XFS_SBS_AGCOUNT:
1330 scounter = mp->m_sb.sb_agcount; 1329 scounter = mp->m_sb.sb_agcount;
1331 scounter += delta; 1330 scounter += delta;
1332 if (scounter < 0) { 1331 if (scounter < 0) {
1333 ASSERT(0); 1332 ASSERT(0);
1334 return (XFS_ERROR(EINVAL)); 1333 return XFS_ERROR(EINVAL);
1335 } 1334 }
1336 mp->m_sb.sb_agcount = scounter; 1335 mp->m_sb.sb_agcount = scounter;
1337 return (0); 1336 return 0;
1338 case XFS_SBS_IMAX_PCT: 1337 case XFS_SBS_IMAX_PCT:
1339 scounter = mp->m_sb.sb_imax_pct; 1338 scounter = mp->m_sb.sb_imax_pct;
1340 scounter += delta; 1339 scounter += delta;
1341 if (scounter < 0) { 1340 if (scounter < 0) {
1342 ASSERT(0); 1341 ASSERT(0);
1343 return (XFS_ERROR(EINVAL)); 1342 return XFS_ERROR(EINVAL);
1344 } 1343 }
1345 mp->m_sb.sb_imax_pct = scounter; 1344 mp->m_sb.sb_imax_pct = scounter;
1346 return (0); 1345 return 0;
1347 case XFS_SBS_REXTSIZE: 1346 case XFS_SBS_REXTSIZE:
1348 scounter = mp->m_sb.sb_rextsize; 1347 scounter = mp->m_sb.sb_rextsize;
1349 scounter += delta; 1348 scounter += delta;
1350 if (scounter < 0) { 1349 if (scounter < 0) {
1351 ASSERT(0); 1350 ASSERT(0);
1352 return (XFS_ERROR(EINVAL)); 1351 return XFS_ERROR(EINVAL);
1353 } 1352 }
1354 mp->m_sb.sb_rextsize = scounter; 1353 mp->m_sb.sb_rextsize = scounter;
1355 return (0); 1354 return 0;
1356 case XFS_SBS_RBMBLOCKS: 1355 case XFS_SBS_RBMBLOCKS:
1357 scounter = mp->m_sb.sb_rbmblocks; 1356 scounter = mp->m_sb.sb_rbmblocks;
1358 scounter += delta; 1357 scounter += delta;
1359 if (scounter < 0) { 1358 if (scounter < 0) {
1360 ASSERT(0); 1359 ASSERT(0);
1361 return (XFS_ERROR(EINVAL)); 1360 return XFS_ERROR(EINVAL);
1362 } 1361 }
1363 mp->m_sb.sb_rbmblocks = scounter; 1362 mp->m_sb.sb_rbmblocks = scounter;
1364 return (0); 1363 return 0;
1365 case XFS_SBS_RBLOCKS: 1364 case XFS_SBS_RBLOCKS:
1366 lcounter = (long long)mp->m_sb.sb_rblocks; 1365 lcounter = (long long)mp->m_sb.sb_rblocks;
1367 lcounter += delta; 1366 lcounter += delta;
1368 if (lcounter < 0) { 1367 if (lcounter < 0) {
1369 ASSERT(0); 1368 ASSERT(0);
1370 return (XFS_ERROR(EINVAL)); 1369 return XFS_ERROR(EINVAL);
1371 } 1370 }
1372 mp->m_sb.sb_rblocks = lcounter; 1371 mp->m_sb.sb_rblocks = lcounter;
1373 return (0); 1372 return 0;
1374 case XFS_SBS_REXTENTS: 1373 case XFS_SBS_REXTENTS:
1375 lcounter = (long long)mp->m_sb.sb_rextents; 1374 lcounter = (long long)mp->m_sb.sb_rextents;
1376 lcounter += delta; 1375 lcounter += delta;
1377 if (lcounter < 0) { 1376 if (lcounter < 0) {
1378 ASSERT(0); 1377 ASSERT(0);
1379 return (XFS_ERROR(EINVAL)); 1378 return XFS_ERROR(EINVAL);
1380 } 1379 }
1381 mp->m_sb.sb_rextents = lcounter; 1380 mp->m_sb.sb_rextents = lcounter;
1382 return (0); 1381 return 0;
1383 case XFS_SBS_REXTSLOG: 1382 case XFS_SBS_REXTSLOG:
1384 scounter = mp->m_sb.sb_rextslog; 1383 scounter = mp->m_sb.sb_rextslog;
1385 scounter += delta; 1384 scounter += delta;
1386 if (scounter < 0) { 1385 if (scounter < 0) {
1387 ASSERT(0); 1386 ASSERT(0);
1388 return (XFS_ERROR(EINVAL)); 1387 return XFS_ERROR(EINVAL);
1389 } 1388 }
1390 mp->m_sb.sb_rextslog = scounter; 1389 mp->m_sb.sb_rextslog = scounter;
1391 return (0); 1390 return 0;
1392 default: 1391 default:
1393 ASSERT(0); 1392 ASSERT(0);
1394 return (XFS_ERROR(EINVAL)); 1393 return XFS_ERROR(EINVAL);
1395 } 1394 }
1396} 1395}
1397 1396
@@ -1410,7 +1409,7 @@ xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd)
1410 s = XFS_SB_LOCK(mp); 1409 s = XFS_SB_LOCK(mp);
1411 status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 1410 status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
1412 XFS_SB_UNLOCK(mp, s); 1411 XFS_SB_UNLOCK(mp, s);
1413 return (status); 1412 return status;
1414} 1413}
1415 1414
1416/* 1415/*
@@ -1471,7 +1470,7 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
1471 } 1470 }
1472 } 1471 }
1473 XFS_SB_UNLOCK(mp, s); 1472 XFS_SB_UNLOCK(mp, s);
1474 return (status); 1473 return status;
1475} 1474}
1476 1475
1477/* 1476/*
@@ -1501,7 +1500,7 @@ xfs_getsb(
1501 } 1500 }
1502 XFS_BUF_HOLD(bp); 1501 XFS_BUF_HOLD(bp);
1503 ASSERT(XFS_BUF_ISDONE(bp)); 1502 ASSERT(XFS_BUF_ISDONE(bp));
1504 return (bp); 1503 return bp;
1505} 1504}
1506 1505
1507/* 1506/*
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 08b2e0a5d807..cd3cf9613a00 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -308,7 +308,6 @@ typedef struct xfs_mount {
308 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */ 308 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */
309 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */ 309 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */
310 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */ 310 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */
311#define m_dev m_ddev_targp->pbr_dev
312 __uint8_t m_dircook_elog; /* log d-cookie entry bits */ 311 __uint8_t m_dircook_elog; /* log d-cookie entry bits */
313 __uint8_t m_blkbit_log; /* blocklog + NBBY */ 312 __uint8_t m_blkbit_log; /* blocklog + NBBY */
314 __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ 313 __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
@@ -393,7 +392,7 @@ typedef struct xfs_mount {
393 user */ 392 user */
394#define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment 393#define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment
395 allocations */ 394 allocations */
396#define XFS_MOUNT_COMPAT_ATTR (1ULL << 8) /* do not use attr2 format */ 395#define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */
397 /* (1ULL << 9) -- currently unused */ 396 /* (1ULL << 9) -- currently unused */
398#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */ 397#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */
399#define XFS_MOUNT_SHARED (1ULL << 11) /* shared mount */ 398#define XFS_MOUNT_SHARED (1ULL << 11) /* shared mount */
@@ -533,7 +532,7 @@ typedef struct xfs_mod_sb {
533 int msb_delta; /* Change to make to specified field */ 532 int msb_delta; /* Change to make to specified field */
534} xfs_mod_sb_t; 533} xfs_mod_sb_t;
535 534
536#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock), PINOD) 535#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock))
537#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock)) 536#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock))
538#define XFS_SB_LOCK(mp) mutex_spinlock(&(mp)->m_sb_lock) 537#define XFS_SB_LOCK(mp) mutex_spinlock(&(mp)->m_sb_lock)
539#define XFS_SB_UNLOCK(mp,s) mutex_spinunlock(&(mp)->m_sb_lock,(s)) 538#define XFS_SB_UNLOCK(mp,s) mutex_spinunlock(&(mp)->m_sb_lock,(s))
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 4d4e8f4e768e..81a05cfd77d2 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -243,7 +243,6 @@ xfs_rename(
243 xfs_inode_t *inodes[4]; 243 xfs_inode_t *inodes[4];
244 int target_ip_dropped = 0; /* dropped target_ip link? */ 244 int target_ip_dropped = 0; /* dropped target_ip link? */
245 vnode_t *src_dir_vp; 245 vnode_t *src_dir_vp;
246 bhv_desc_t *target_dir_bdp;
247 int spaceres; 246 int spaceres;
248 int target_link_zero = 0; 247 int target_link_zero = 0;
249 int num_inodes; 248 int num_inodes;
@@ -260,14 +259,12 @@ xfs_rename(
260 * Find the XFS behavior descriptor for the target directory 259 * Find the XFS behavior descriptor for the target directory
261 * vnode since it was not handed to us. 260 * vnode since it was not handed to us.
262 */ 261 */
263 target_dir_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(target_dir_vp), 262 target_dp = xfs_vtoi(target_dir_vp);
264 &xfs_vnodeops); 263 if (target_dp == NULL) {
265 if (target_dir_bdp == NULL) {
266 return XFS_ERROR(EXDEV); 264 return XFS_ERROR(EXDEV);
267 } 265 }
268 266
269 src_dp = XFS_BHVTOI(src_dir_bdp); 267 src_dp = XFS_BHVTOI(src_dir_bdp);
270 target_dp = XFS_BHVTOI(target_dir_bdp);
271 mp = src_dp->i_mount; 268 mp = src_dp->i_mount;
272 269
273 if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) || 270 if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) ||
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index c4b20872f07d..a59c102cf214 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -238,6 +238,7 @@ xfs_bioerror_relse(
238 } 238 }
239 return (EIO); 239 return (EIO);
240} 240}
241
241/* 242/*
242 * Prints out an ALERT message about I/O error. 243 * Prints out an ALERT message about I/O error.
243 */ 244 */
@@ -252,11 +253,9 @@ xfs_ioerror_alert(
252 "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx" 253 "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx"
253 " (\"%s\") error %d buf count %zd", 254 " (\"%s\") error %d buf count %zd",
254 (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname, 255 (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,
255 XFS_BUFTARG_NAME(bp->pb_target), 256 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
256 (__uint64_t)blkno, 257 (__uint64_t)blkno, func,
257 func, 258 XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));
258 XFS_BUF_GETERROR(bp),
259 XFS_BUF_COUNT(bp));
260} 259}
261 260
262/* 261/*
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 4a17d335f897..bf168a91ddb8 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -68,18 +68,6 @@ struct xfs_mount;
68 (XFS_SB_VERSION_NUMBITS | \ 68 (XFS_SB_VERSION_NUMBITS | \
69 XFS_SB_VERSION_OKREALFBITS | \ 69 XFS_SB_VERSION_OKREALFBITS | \
70 XFS_SB_VERSION_OKSASHFBITS) 70 XFS_SB_VERSION_OKSASHFBITS)
71#define XFS_SB_VERSION_MKFS(ia,dia,extflag,dirv2,na,sflag,morebits) \
72 (((ia) || (dia) || (extflag) || (dirv2) || (na) || (sflag) || \
73 (morebits)) ? \
74 (XFS_SB_VERSION_4 | \
75 ((ia) ? XFS_SB_VERSION_ALIGNBIT : 0) | \
76 ((dia) ? XFS_SB_VERSION_DALIGNBIT : 0) | \
77 ((extflag) ? XFS_SB_VERSION_EXTFLGBIT : 0) | \
78 ((dirv2) ? XFS_SB_VERSION_DIRV2BIT : 0) | \
79 ((na) ? XFS_SB_VERSION_LOGV2BIT : 0) | \
80 ((sflag) ? XFS_SB_VERSION_SECTORBIT : 0) | \
81 ((morebits) ? XFS_SB_VERSION_MOREBITSBIT : 0)) : \
82 XFS_SB_VERSION_1)
83 71
84/* 72/*
85 * There are two words to hold XFS "feature" bits: the original 73 * There are two words to hold XFS "feature" bits: the original
@@ -105,11 +93,6 @@ struct xfs_mount;
105 (XFS_SB_VERSION2_OKREALFBITS | \ 93 (XFS_SB_VERSION2_OKREALFBITS | \
106 XFS_SB_VERSION2_OKSASHFBITS ) 94 XFS_SB_VERSION2_OKSASHFBITS )
107 95
108/*
109 * mkfs macro to set up sb_features2 word
110 */
111#define XFS_SB_VERSION2_MKFS(resvd1, sbcntr) 0
112
113typedef struct xfs_sb 96typedef struct xfs_sb
114{ 97{
115 __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */ 98 __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 279e043d7323..d3d714e6b32a 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1014,6 +1014,7 @@ xfs_trans_cancel(
1014 xfs_log_item_t *lip; 1014 xfs_log_item_t *lip;
1015 int i; 1015 int i;
1016#endif 1016#endif
1017 xfs_mount_t *mp = tp->t_mountp;
1017 1018
1018 /* 1019 /*
1019 * See if the caller is being too lazy to figure out if 1020 * See if the caller is being too lazy to figure out if
@@ -1026,9 +1027,10 @@ xfs_trans_cancel(
1026 * filesystem. This happens in paths where we detect 1027 * filesystem. This happens in paths where we detect
1027 * corruption and decide to give up. 1028 * corruption and decide to give up.
1028 */ 1029 */
1029 if ((tp->t_flags & XFS_TRANS_DIRTY) && 1030 if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) {
1030 !XFS_FORCED_SHUTDOWN(tp->t_mountp)) 1031 XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
1031 xfs_force_shutdown(tp->t_mountp, XFS_CORRUPT_INCORE); 1032 xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
1033 }
1032#ifdef DEBUG 1034#ifdef DEBUG
1033 if (!(flags & XFS_TRANS_ABORT)) { 1035 if (!(flags & XFS_TRANS_ABORT)) {
1034 licp = &(tp->t_items); 1036 licp = &(tp->t_items);
@@ -1040,7 +1042,7 @@ xfs_trans_cancel(
1040 } 1042 }
1041 1043
1042 lip = lidp->lid_item; 1044 lip = lidp->lid_item;
1043 if (!XFS_FORCED_SHUTDOWN(tp->t_mountp)) 1045 if (!XFS_FORCED_SHUTDOWN(mp))
1044 ASSERT(!(lip->li_type == XFS_LI_EFD)); 1046 ASSERT(!(lip->li_type == XFS_LI_EFD));
1045 } 1047 }
1046 licp = licp->lic_next; 1048 licp = licp->lic_next;
@@ -1048,7 +1050,7 @@ xfs_trans_cancel(
1048 } 1050 }
1049#endif 1051#endif
1050 xfs_trans_unreserve_and_mod_sb(tp); 1052 xfs_trans_unreserve_and_mod_sb(tp);
1051 XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp); 1053 XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
1052 1054
1053 if (tp->t_ticket) { 1055 if (tp->t_ticket) {
1054 if (flags & XFS_TRANS_RELEASE_LOG_RES) { 1056 if (flags & XFS_TRANS_RELEASE_LOG_RES) {
@@ -1057,7 +1059,7 @@ xfs_trans_cancel(
1057 } else { 1059 } else {
1058 log_flags = 0; 1060 log_flags = 0;
1059 } 1061 }
1060 xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags); 1062 xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
1061 } 1063 }
1062 1064
1063 /* mark this thread as no longer being in a transaction */ 1065 /* mark this thread as no longer being in a transaction */
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index a889963fdd14..d77901c07f63 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -973,7 +973,6 @@ void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
973void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *); 973void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
974void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); 974void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
975void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); 975void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
976void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
977void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); 976void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
978void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); 977void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
979void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); 978void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 486147ef0e3d..1117d600d741 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -78,7 +78,7 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
78 lidp->lid_size = 0; 78 lidp->lid_size = 0;
79 lip->li_desc = lidp; 79 lip->li_desc = lidp;
80 lip->li_mountp = tp->t_mountp; 80 lip->li_mountp = tp->t_mountp;
81 return (lidp); 81 return lidp;
82 } 82 }
83 83
84 /* 84 /*
@@ -119,7 +119,7 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
119 lidp->lid_size = 0; 119 lidp->lid_size = 0;
120 lip->li_desc = lidp; 120 lip->li_desc = lidp;
121 lip->li_mountp = tp->t_mountp; 121 lip->li_mountp = tp->t_mountp;
122 return (lidp); 122 return lidp;
123} 123}
124 124
125/* 125/*
@@ -180,7 +180,7 @@ xfs_trans_find_item(xfs_trans_t *tp, xfs_log_item_t *lip)
180{ 180{
181 ASSERT(lip->li_desc != NULL); 181 ASSERT(lip->li_desc != NULL);
182 182
183 return (lip->li_desc); 183 return lip->li_desc;
184} 184}
185 185
186 186
@@ -219,10 +219,10 @@ xfs_trans_first_item(xfs_trans_t *tp)
219 continue; 219 continue;
220 } 220 }
221 221
222 return (XFS_LIC_SLOT(licp, i)); 222 return XFS_LIC_SLOT(licp, i);
223 } 223 }
224 cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item"); 224 cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item");
225 return(NULL); 225 return NULL;
226} 226}
227 227
228 228
@@ -252,7 +252,7 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
252 continue; 252 continue;
253 } 253 }
254 254
255 return (XFS_LIC_SLOT(licp, i)); 255 return XFS_LIC_SLOT(licp, i);
256 } 256 }
257 257
258 /* 258 /*
@@ -261,7 +261,7 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
261 * If there is no next chunk, return NULL. 261 * If there is no next chunk, return NULL.
262 */ 262 */
263 if (licp->lic_next == NULL) { 263 if (licp->lic_next == NULL) {
264 return (NULL); 264 return NULL;
265 } 265 }
266 266
267 licp = licp->lic_next; 267 licp = licp->lic_next;
@@ -271,7 +271,7 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
271 continue; 271 continue;
272 } 272 }
273 273
274 return (XFS_LIC_SLOT(licp, i)); 274 return XFS_LIC_SLOT(licp, i);
275 } 275 }
276 ASSERT(0); 276 ASSERT(0);
277 /* NOTREACHED */ 277 /* NOTREACHED */
@@ -425,7 +425,7 @@ xfs_trans_unlock_chunk(
425 } 425 }
426 } 426 }
427 427
428 return (freed); 428 return freed;
429} 429}
430 430
431 431
@@ -478,7 +478,7 @@ xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx)
478 */ 478 */
479 lbsp->lbc_ag = ag; 479 lbsp->lbc_ag = ag;
480 lbsp->lbc_idx = idx; 480 lbsp->lbc_idx = idx;
481 return (lbsp); 481 return lbsp;
482 } 482 }
483 483
484 /* 484 /*
@@ -512,7 +512,7 @@ xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx)
512 tp->t_busy_free--; 512 tp->t_busy_free--;
513 lbsp->lbc_ag = ag; 513 lbsp->lbc_ag = ag;
514 lbsp->lbc_idx = idx; 514 lbsp->lbc_idx = idx;
515 return (lbsp); 515 return lbsp;
516} 516}
517 517
518 518
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index fefe1d60377f..34654ec6ae10 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -55,16 +55,13 @@ xfs_get_dir_entry(
55 xfs_inode_t **ipp) 55 xfs_inode_t **ipp)
56{ 56{
57 vnode_t *vp; 57 vnode_t *vp;
58 bhv_desc_t *bdp;
59 58
60 vp = VNAME_TO_VNODE(dentry); 59 vp = VNAME_TO_VNODE(dentry);
61 bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops); 60
62 if (!bdp) { 61 *ipp = xfs_vtoi(vp);
63 *ipp = NULL; 62 if (!*ipp)
64 return XFS_ERROR(ENOENT); 63 return XFS_ERROR(ENOENT);
65 }
66 VN_HOLD(vp); 64 VN_HOLD(vp);
67 *ipp = XFS_BHVTOI(bdp);
68 return 0; 65 return 0;
69} 66}
70 67
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 7bdbd991ab1c..b6ad370fab3d 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -53,6 +53,7 @@
53#include "xfs_acl.h" 53#include "xfs_acl.h"
54#include "xfs_attr.h" 54#include "xfs_attr.h"
55#include "xfs_clnt.h" 55#include "xfs_clnt.h"
56#include "xfs_fsops.h"
56 57
57STATIC int xfs_sync(bhv_desc_t *, int, cred_t *); 58STATIC int xfs_sync(bhv_desc_t *, int, cred_t *);
58 59
@@ -290,8 +291,8 @@ xfs_start_flags(
290 mp->m_flags |= XFS_MOUNT_IDELETE; 291 mp->m_flags |= XFS_MOUNT_IDELETE;
291 if (ap->flags & XFSMNT_DIRSYNC) 292 if (ap->flags & XFSMNT_DIRSYNC)
292 mp->m_flags |= XFS_MOUNT_DIRSYNC; 293 mp->m_flags |= XFS_MOUNT_DIRSYNC;
293 if (ap->flags & XFSMNT_COMPAT_ATTR) 294 if (ap->flags & XFSMNT_ATTR2)
294 mp->m_flags |= XFS_MOUNT_COMPAT_ATTR; 295 mp->m_flags |= XFS_MOUNT_ATTR2;
295 296
296 if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE) 297 if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
297 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; 298 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
@@ -312,6 +313,8 @@ xfs_start_flags(
312 mp->m_flags |= XFS_MOUNT_NOUUID; 313 mp->m_flags |= XFS_MOUNT_NOUUID;
313 if (ap->flags & XFSMNT_BARRIER) 314 if (ap->flags & XFSMNT_BARRIER)
314 mp->m_flags |= XFS_MOUNT_BARRIER; 315 mp->m_flags |= XFS_MOUNT_BARRIER;
316 else
317 mp->m_flags &= ~XFS_MOUNT_BARRIER;
315 318
316 return 0; 319 return 0;
317} 320}
@@ -330,10 +333,11 @@ xfs_finish_flags(
330 333
331 /* Fail a mount where the logbuf is smaller then the log stripe */ 334 /* Fail a mount where the logbuf is smaller then the log stripe */
332 if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) { 335 if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) {
333 if ((ap->logbufsize == -1) && 336 if ((ap->logbufsize <= 0) &&
334 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) { 337 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
335 mp->m_logbsize = mp->m_sb.sb_logsunit; 338 mp->m_logbsize = mp->m_sb.sb_logsunit;
336 } else if (ap->logbufsize < mp->m_sb.sb_logsunit) { 339 } else if (ap->logbufsize > 0 &&
340 ap->logbufsize < mp->m_sb.sb_logsunit) {
337 cmn_err(CE_WARN, 341 cmn_err(CE_WARN,
338 "XFS: logbuf size must be greater than or equal to log stripe size"); 342 "XFS: logbuf size must be greater than or equal to log stripe size");
339 return XFS_ERROR(EINVAL); 343 return XFS_ERROR(EINVAL);
@@ -347,6 +351,10 @@ xfs_finish_flags(
347 } 351 }
348 } 352 }
349 353
354 if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
355 mp->m_flags |= XFS_MOUNT_ATTR2;
356 }
357
350 /* 358 /*
351 * prohibit r/w mounts of read-only filesystems 359 * prohibit r/w mounts of read-only filesystems
352 */ 360 */
@@ -382,10 +390,6 @@ xfs_finish_flags(
382 return XFS_ERROR(EINVAL); 390 return XFS_ERROR(EINVAL);
383 } 391 }
384 392
385 if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
386 mp->m_flags &= ~XFS_MOUNT_COMPAT_ATTR;
387 }
388
389 return 0; 393 return 0;
390} 394}
391 395
@@ -504,13 +508,13 @@ xfs_mount(
504 if (error) 508 if (error)
505 goto error2; 509 goto error2;
506 510
511 if ((mp->m_flags & XFS_MOUNT_BARRIER) && !(vfsp->vfs_flag & VFS_RDONLY))
512 xfs_mountfs_check_barriers(mp);
513
507 error = XFS_IOINIT(vfsp, args, flags); 514 error = XFS_IOINIT(vfsp, args, flags);
508 if (error) 515 if (error)
509 goto error2; 516 goto error2;
510 517
511 if ((args->flags & XFSMNT_BARRIER) &&
512 !(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY))
513 xfs_mountfs_check_barriers(mp);
514 return 0; 518 return 0;
515 519
516error2: 520error2:
@@ -655,6 +659,11 @@ xfs_mntupdate(
655 else 659 else
656 mp->m_flags &= ~XFS_MOUNT_NOATIME; 660 mp->m_flags &= ~XFS_MOUNT_NOATIME;
657 661
662 if (args->flags & XFSMNT_BARRIER)
663 mp->m_flags |= XFS_MOUNT_BARRIER;
664 else
665 mp->m_flags &= ~XFS_MOUNT_BARRIER;
666
658 if ((vfsp->vfs_flag & VFS_RDONLY) && 667 if ((vfsp->vfs_flag & VFS_RDONLY) &&
659 !(*flags & MS_RDONLY)) { 668 !(*flags & MS_RDONLY)) {
660 vfsp->vfs_flag &= ~VFS_RDONLY; 669 vfsp->vfs_flag &= ~VFS_RDONLY;
@@ -1634,6 +1643,7 @@ xfs_vget(
1634#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ 1643#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */
1635#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and 1644#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and
1636 * unwritten extent conversion */ 1645 * unwritten extent conversion */
1646#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */
1637#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */ 1647#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
1638#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ 1648#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
1639#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ 1649#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
@@ -1680,7 +1690,6 @@ xfs_parseargs(
1680 int iosize; 1690 int iosize;
1681 1691
1682 args->flags2 |= XFSMNT2_COMPAT_IOSIZE; 1692 args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
1683 args->flags |= XFSMNT_COMPAT_ATTR;
1684 1693
1685#if 0 /* XXX: off by default, until some remaining issues ironed out */ 1694#if 0 /* XXX: off by default, until some remaining issues ironed out */
1686 args->flags |= XFSMNT_IDELETE; /* default to on */ 1695 args->flags |= XFSMNT_IDELETE; /* default to on */
@@ -1806,6 +1815,8 @@ xfs_parseargs(
1806 args->flags |= XFSMNT_NOUUID; 1815 args->flags |= XFSMNT_NOUUID;
1807 } else if (!strcmp(this_char, MNTOPT_BARRIER)) { 1816 } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
1808 args->flags |= XFSMNT_BARRIER; 1817 args->flags |= XFSMNT_BARRIER;
1818 } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
1819 args->flags &= ~XFSMNT_BARRIER;
1809 } else if (!strcmp(this_char, MNTOPT_IKEEP)) { 1820 } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
1810 args->flags &= ~XFSMNT_IDELETE; 1821 args->flags &= ~XFSMNT_IDELETE;
1811 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { 1822 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
@@ -1815,9 +1826,9 @@ xfs_parseargs(
1815 } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { 1826 } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
1816 args->flags2 |= XFSMNT2_COMPAT_IOSIZE; 1827 args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
1817 } else if (!strcmp(this_char, MNTOPT_ATTR2)) { 1828 } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
1818 args->flags &= ~XFSMNT_COMPAT_ATTR; 1829 args->flags |= XFSMNT_ATTR2;
1819 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { 1830 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
1820 args->flags |= XFSMNT_COMPAT_ATTR; 1831 args->flags &= ~XFSMNT_ATTR2;
1821 } else if (!strcmp(this_char, "osyncisdsync")) { 1832 } else if (!strcmp(this_char, "osyncisdsync")) {
1822 /* no-op, this is now the default */ 1833 /* no-op, this is now the default */
1823printk("XFS: osyncisdsync is now the default, option is deprecated.\n"); 1834printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
@@ -1892,7 +1903,6 @@ xfs_showargs(
1892 { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, 1903 { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID },
1893 { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, 1904 { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY },
1894 { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC }, 1905 { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC },
1895 { XFS_MOUNT_BARRIER, "," MNTOPT_BARRIER },
1896 { XFS_MOUNT_IDELETE, "," MNTOPT_NOIKEEP }, 1906 { XFS_MOUNT_IDELETE, "," MNTOPT_NOIKEEP },
1897 { 0, NULL } 1907 { 0, NULL }
1898 }; 1908 };
@@ -1914,33 +1924,28 @@ xfs_showargs(
1914 1924
1915 if (mp->m_logbufs > 0) 1925 if (mp->m_logbufs > 0)
1916 seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs); 1926 seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
1917
1918 if (mp->m_logbsize > 0) 1927 if (mp->m_logbsize > 0)
1919 seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10); 1928 seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
1920 1929
1921 if (mp->m_logname) 1930 if (mp->m_logname)
1922 seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname); 1931 seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
1923
1924 if (mp->m_rtname) 1932 if (mp->m_rtname)
1925 seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname); 1933 seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
1926 1934
1927 if (mp->m_dalign > 0) 1935 if (mp->m_dalign > 0)
1928 seq_printf(m, "," MNTOPT_SUNIT "=%d", 1936 seq_printf(m, "," MNTOPT_SUNIT "=%d",
1929 (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); 1937 (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
1930
1931 if (mp->m_swidth > 0) 1938 if (mp->m_swidth > 0)
1932 seq_printf(m, "," MNTOPT_SWIDTH "=%d", 1939 seq_printf(m, "," MNTOPT_SWIDTH "=%d",
1933 (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); 1940 (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
1934 1941
1935 if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR))
1936 seq_printf(m, "," MNTOPT_ATTR2);
1937
1938 if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)) 1942 if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE))
1939 seq_printf(m, "," MNTOPT_LARGEIO); 1943 seq_printf(m, "," MNTOPT_LARGEIO);
1944 if (mp->m_flags & XFS_MOUNT_BARRIER)
1945 seq_printf(m, "," MNTOPT_BARRIER);
1940 1946
1941 if (!(vfsp->vfs_flag & VFS_32BITINODES)) 1947 if (!(vfsp->vfs_flag & VFS_32BITINODES))
1942 seq_printf(m, "," MNTOPT_64BITINODE); 1948 seq_printf(m, "," MNTOPT_64BITINODE);
1943
1944 if (vfsp->vfs_flag & VFS_GRPID) 1949 if (vfsp->vfs_flag & VFS_GRPID)
1945 seq_printf(m, "," MNTOPT_GRPID); 1950 seq_printf(m, "," MNTOPT_GRPID);
1946 1951
@@ -1959,6 +1964,7 @@ xfs_freeze(
1959 /* Push the superblock and write an unmount record */ 1964 /* Push the superblock and write an unmount record */
1960 xfs_log_unmount_write(mp); 1965 xfs_log_unmount_write(mp);
1961 xfs_unmountfs_writesb(mp); 1966 xfs_unmountfs_writesb(mp);
1967 xfs_fs_log_dummy(mp);
1962} 1968}
1963 1969
1964 1970
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index e03fa2a3d5ed..eaab355f5a89 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -15,6 +15,9 @@
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18
19#include <linux/capability.h>
20
18#include "xfs.h" 21#include "xfs.h"
19#include "xfs_fs.h" 22#include "xfs_fs.h"
20#include "xfs_types.h" 23#include "xfs_types.h"
@@ -182,8 +185,7 @@ xfs_getattr(
182 break; 185 break;
183 } 186 }
184 187
185 vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec; 188 vn_atime_to_timespec(vp, &vap->va_atime);
186 vap->va_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
187 vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec; 189 vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
188 vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; 190 vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
189 vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec; 191 vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
@@ -336,7 +338,7 @@ xfs_setattr(
336 code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags, 338 code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags,
337 &udqp, &gdqp); 339 &udqp, &gdqp);
338 if (code) 340 if (code)
339 return (code); 341 return code;
340 } 342 }
341 343
342 /* 344 /*
@@ -541,24 +543,6 @@ xfs_setattr(
541 } 543 }
542 544
543 /* 545 /*
544 * Can't set extent size unless the file is marked, or
545 * about to be marked as a realtime file.
546 *
547 * This check will be removed when fixed size extents
548 * with buffered data writes is implemented.
549 *
550 */
551 if ((mask & XFS_AT_EXTSIZE) &&
552 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
553 vap->va_extsize) &&
554 (!((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ||
555 ((mask & XFS_AT_XFLAGS) &&
556 (vap->va_xflags & XFS_XFLAG_REALTIME))))) {
557 code = XFS_ERROR(EINVAL);
558 goto error_return;
559 }
560
561 /*
562 * Can't change realtime flag if any extents are allocated. 546 * Can't change realtime flag if any extents are allocated.
563 */ 547 */
564 if ((ip->i_d.di_nextents || ip->i_delayed_blks) && 548 if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
@@ -820,13 +804,17 @@ xfs_setattr(
820 di_flags |= XFS_DIFLAG_RTINHERIT; 804 di_flags |= XFS_DIFLAG_RTINHERIT;
821 if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS) 805 if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
822 di_flags |= XFS_DIFLAG_NOSYMLINKS; 806 di_flags |= XFS_DIFLAG_NOSYMLINKS;
823 } else { 807 if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT)
808 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
809 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
824 if (vap->va_xflags & XFS_XFLAG_REALTIME) { 810 if (vap->va_xflags & XFS_XFLAG_REALTIME) {
825 di_flags |= XFS_DIFLAG_REALTIME; 811 di_flags |= XFS_DIFLAG_REALTIME;
826 ip->i_iocore.io_flags |= XFS_IOCORE_RT; 812 ip->i_iocore.io_flags |= XFS_IOCORE_RT;
827 } else { 813 } else {
828 ip->i_iocore.io_flags &= ~XFS_IOCORE_RT; 814 ip->i_iocore.io_flags &= ~XFS_IOCORE_RT;
829 } 815 }
816 if (vap->va_xflags & XFS_XFLAG_EXTSIZE)
817 di_flags |= XFS_DIFLAG_EXTSIZE;
830 } 818 }
831 ip->i_d.di_flags = di_flags; 819 ip->i_d.di_flags = di_flags;
832 } 820 }
@@ -996,10 +984,6 @@ xfs_readlink(
996 goto error_return; 984 goto error_return;
997 } 985 }
998 986
999 if (!(ioflags & IO_INVIS)) {
1000 xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
1001 }
1002
1003 /* 987 /*
1004 * See if the symlink is stored inline. 988 * See if the symlink is stored inline.
1005 */ 989 */
@@ -1043,11 +1027,8 @@ xfs_readlink(
1043 1027
1044 } 1028 }
1045 1029
1046
1047error_return: 1030error_return:
1048
1049 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1031 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1050
1051 return error; 1032 return error;
1052} 1033}
1053 1034
@@ -1222,7 +1203,7 @@ xfs_inactive_free_eofblocks(
1222 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 1203 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1223 map_len = last_fsb - end_fsb; 1204 map_len = last_fsb - end_fsb;
1224 if (map_len <= 0) 1205 if (map_len <= 0)
1225 return (0); 1206 return 0;
1226 1207
1227 nimaps = 1; 1208 nimaps = 1;
1228 xfs_ilock(ip, XFS_ILOCK_SHARED); 1209 xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -1231,12 +1212,13 @@ xfs_inactive_free_eofblocks(
1231 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1212 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1232 1213
1233 if (!error && (nimaps != 0) && 1214 if (!error && (nimaps != 0) &&
1234 (imap.br_startblock != HOLESTARTBLOCK)) { 1215 (imap.br_startblock != HOLESTARTBLOCK ||
1216 ip->i_delayed_blks)) {
1235 /* 1217 /*
1236 * Attach the dquots to the inode up front. 1218 * Attach the dquots to the inode up front.
1237 */ 1219 */
1238 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 1220 if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
1239 return (error); 1221 return error;
1240 1222
1241 /* 1223 /*
1242 * There are blocks after the end of file. 1224 * There are blocks after the end of file.
@@ -1264,7 +1246,7 @@ xfs_inactive_free_eofblocks(
1264 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1246 ASSERT(XFS_FORCED_SHUTDOWN(mp));
1265 xfs_trans_cancel(tp, 0); 1247 xfs_trans_cancel(tp, 0);
1266 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1248 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1267 return (error); 1249 return error;
1268 } 1250 }
1269 1251
1270 xfs_ilock(ip, XFS_ILOCK_EXCL); 1252 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1292,7 +1274,7 @@ xfs_inactive_free_eofblocks(
1292 } 1274 }
1293 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1275 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1294 } 1276 }
1295 return (error); 1277 return error;
1296} 1278}
1297 1279
1298/* 1280/*
@@ -1470,7 +1452,7 @@ xfs_inactive_symlink_local(
1470 if (error) { 1452 if (error) {
1471 xfs_trans_cancel(*tpp, 0); 1453 xfs_trans_cancel(*tpp, 0);
1472 *tpp = NULL; 1454 *tpp = NULL;
1473 return (error); 1455 return error;
1474 } 1456 }
1475 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1457 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1476 1458
@@ -1483,7 +1465,7 @@ xfs_inactive_symlink_local(
1483 XFS_DATA_FORK); 1465 XFS_DATA_FORK);
1484 ASSERT(ip->i_df.if_bytes == 0); 1466 ASSERT(ip->i_df.if_bytes == 0);
1485 } 1467 }
1486 return (0); 1468 return 0;
1487} 1469}
1488 1470
1489/* 1471/*
@@ -1509,7 +1491,7 @@ xfs_inactive_attrs(
1509 if (error) { 1491 if (error) {
1510 *tpp = NULL; 1492 *tpp = NULL;
1511 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1493 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1512 return (error); /* goto out*/ 1494 return error; /* goto out */
1513 } 1495 }
1514 1496
1515 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1497 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
@@ -1522,7 +1504,7 @@ xfs_inactive_attrs(
1522 xfs_trans_cancel(tp, 0); 1504 xfs_trans_cancel(tp, 0);
1523 *tpp = NULL; 1505 *tpp = NULL;
1524 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1506 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1525 return (error); 1507 return error;
1526 } 1508 }
1527 1509
1528 xfs_ilock(ip, XFS_ILOCK_EXCL); 1510 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1533,7 +1515,7 @@ xfs_inactive_attrs(
1533 ASSERT(ip->i_d.di_anextents == 0); 1515 ASSERT(ip->i_d.di_anextents == 0);
1534 1516
1535 *tpp = tp; 1517 *tpp = tp;
1536 return (0); 1518 return 0;
1537} 1519}
1538 1520
1539STATIC int 1521STATIC int
@@ -1566,11 +1548,13 @@ xfs_release(
1566 1548
1567 if (ip->i_d.di_nlink != 0) { 1549 if (ip->i_d.di_nlink != 0) {
1568 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1550 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1569 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) && 1551 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
1552 ip->i_delayed_blks > 0)) &&
1570 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1553 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
1571 (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)))) { 1554 (!(ip->i_d.di_flags &
1555 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
1572 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1556 if ((error = xfs_inactive_free_eofblocks(mp, ip)))
1573 return (error); 1557 return error;
1574 /* Update linux inode block count after free above */ 1558 /* Update linux inode block count after free above */
1575 LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp, 1559 LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp,
1576 ip->i_d.di_nblocks + ip->i_delayed_blks); 1560 ip->i_d.di_nblocks + ip->i_delayed_blks);
@@ -1625,7 +1609,8 @@ xfs_inactive(
1625 * only one with a reference to the inode. 1609 * only one with a reference to the inode.
1626 */ 1610 */
1627 truncate = ((ip->i_d.di_nlink == 0) && 1611 truncate = ((ip->i_d.di_nlink == 0) &&
1628 ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0)) && 1612 ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) ||
1613 (ip->i_delayed_blks > 0)) &&
1629 ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); 1614 ((ip->i_d.di_mode & S_IFMT) == S_IFREG));
1630 1615
1631 mp = ip->i_mount; 1616 mp = ip->i_mount;
@@ -1643,12 +1628,14 @@ xfs_inactive(
1643 1628
1644 if (ip->i_d.di_nlink != 0) { 1629 if (ip->i_d.di_nlink != 0) {
1645 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1630 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1646 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) && 1631 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
1647 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1632 ip->i_delayed_blks > 0)) &&
1648 (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)) || 1633 (ip->i_df.if_flags & XFS_IFEXTENTS) &&
1649 (ip->i_delayed_blks != 0))) { 1634 (!(ip->i_d.di_flags &
1635 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
1636 (ip->i_delayed_blks != 0)))) {
1650 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1637 if ((error = xfs_inactive_free_eofblocks(mp, ip)))
1651 return (VN_INACTIVE_CACHE); 1638 return VN_INACTIVE_CACHE;
1652 /* Update linux inode block count after free above */ 1639 /* Update linux inode block count after free above */
1653 LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp, 1640 LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp,
1654 ip->i_d.di_nblocks + ip->i_delayed_blks); 1641 ip->i_d.di_nblocks + ip->i_delayed_blks);
@@ -1659,7 +1646,7 @@ xfs_inactive(
1659 ASSERT(ip->i_d.di_nlink == 0); 1646 ASSERT(ip->i_d.di_nlink == 0);
1660 1647
1661 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 1648 if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
1662 return (VN_INACTIVE_CACHE); 1649 return VN_INACTIVE_CACHE;
1663 1650
1664 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1651 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
1665 if (truncate) { 1652 if (truncate) {
@@ -1682,7 +1669,7 @@ xfs_inactive(
1682 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1669 ASSERT(XFS_FORCED_SHUTDOWN(mp));
1683 xfs_trans_cancel(tp, 0); 1670 xfs_trans_cancel(tp, 0);
1684 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1671 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1685 return (VN_INACTIVE_CACHE); 1672 return VN_INACTIVE_CACHE;
1686 } 1673 }
1687 1674
1688 xfs_ilock(ip, XFS_ILOCK_EXCL); 1675 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1703,7 +1690,7 @@ xfs_inactive(
1703 xfs_trans_cancel(tp, 1690 xfs_trans_cancel(tp,
1704 XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1691 XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1705 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1692 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1706 return (VN_INACTIVE_CACHE); 1693 return VN_INACTIVE_CACHE;
1707 } 1694 }
1708 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) { 1695 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) {
1709 1696
@@ -1717,7 +1704,7 @@ xfs_inactive(
1717 1704
1718 if (error) { 1705 if (error) {
1719 ASSERT(tp == NULL); 1706 ASSERT(tp == NULL);
1720 return (VN_INACTIVE_CACHE); 1707 return VN_INACTIVE_CACHE;
1721 } 1708 }
1722 1709
1723 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1710 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
@@ -1730,7 +1717,7 @@ xfs_inactive(
1730 if (error) { 1717 if (error) {
1731 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1718 ASSERT(XFS_FORCED_SHUTDOWN(mp));
1732 xfs_trans_cancel(tp, 0); 1719 xfs_trans_cancel(tp, 0);
1733 return (VN_INACTIVE_CACHE); 1720 return VN_INACTIVE_CACHE;
1734 } 1721 }
1735 1722
1736 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1723 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
@@ -1752,7 +1739,7 @@ xfs_inactive(
1752 * cancelled, and the inode is unlocked. Just get out. 1739 * cancelled, and the inode is unlocked. Just get out.
1753 */ 1740 */
1754 if (error) 1741 if (error)
1755 return (VN_INACTIVE_CACHE); 1742 return VN_INACTIVE_CACHE;
1756 } else if (ip->i_afp) { 1743 } else if (ip->i_afp) {
1757 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1744 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
1758 } 1745 }
@@ -2059,8 +2046,8 @@ std_return:
2059 abort_return: 2046 abort_return:
2060 cancel_flags |= XFS_TRANS_ABORT; 2047 cancel_flags |= XFS_TRANS_ABORT;
2061 /* FALLTHROUGH */ 2048 /* FALLTHROUGH */
2062 error_return:
2063 2049
2050 error_return:
2064 if (tp != NULL) 2051 if (tp != NULL)
2065 xfs_trans_cancel(tp, cancel_flags); 2052 xfs_trans_cancel(tp, cancel_flags);
2066 2053
@@ -2590,7 +2577,6 @@ xfs_link(
2590 int cancel_flags; 2577 int cancel_flags;
2591 int committed; 2578 int committed;
2592 vnode_t *target_dir_vp; 2579 vnode_t *target_dir_vp;
2593 bhv_desc_t *src_bdp;
2594 int resblks; 2580 int resblks;
2595 char *target_name = VNAME(dentry); 2581 char *target_name = VNAME(dentry);
2596 int target_namelen; 2582 int target_namelen;
@@ -2603,8 +2589,7 @@ xfs_link(
2603 if (VN_ISDIR(src_vp)) 2589 if (VN_ISDIR(src_vp))
2604 return XFS_ERROR(EPERM); 2590 return XFS_ERROR(EPERM);
2605 2591
2606 src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops); 2592 sip = xfs_vtoi(src_vp);
2607 sip = XFS_BHVTOI(src_bdp);
2608 tdp = XFS_BHVTOI(target_dir_bdp); 2593 tdp = XFS_BHVTOI(target_dir_bdp);
2609 mp = tdp->i_mount; 2594 mp = tdp->i_mount;
2610 if (XFS_FORCED_SHUTDOWN(mp)) 2595 if (XFS_FORCED_SHUTDOWN(mp))
@@ -2736,9 +2721,9 @@ std_return:
2736 abort_return: 2721 abort_return:
2737 cancel_flags |= XFS_TRANS_ABORT; 2722 cancel_flags |= XFS_TRANS_ABORT;
2738 /* FALLTHROUGH */ 2723 /* FALLTHROUGH */
2724
2739 error_return: 2725 error_return:
2740 xfs_trans_cancel(tp, cancel_flags); 2726 xfs_trans_cancel(tp, cancel_flags);
2741
2742 goto std_return; 2727 goto std_return;
2743} 2728}
2744/* 2729/*
@@ -3211,10 +3196,12 @@ std_return:
3211 } 3196 }
3212 return error; 3197 return error;
3213 3198
3214 error1: 3199error1:
3215 xfs_bmap_cancel(&free_list); 3200 xfs_bmap_cancel(&free_list);
3216 cancel_flags |= XFS_TRANS_ABORT; 3201 cancel_flags |= XFS_TRANS_ABORT;
3217 error_return: 3202 /* FALLTHROUGH */
3203
3204error_return:
3218 xfs_trans_cancel(tp, cancel_flags); 3205 xfs_trans_cancel(tp, cancel_flags);
3219 goto std_return; 3206 goto std_return;
3220} 3207}
@@ -3237,7 +3224,6 @@ xfs_readdir(
3237 xfs_trans_t *tp = NULL; 3224 xfs_trans_t *tp = NULL;
3238 int error = 0; 3225 int error = 0;
3239 uint lock_mode; 3226 uint lock_mode;
3240 xfs_off_t start_offset;
3241 3227
3242 vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__, 3228 vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__,
3243 (inst_t *)__return_address); 3229 (inst_t *)__return_address);
@@ -3248,11 +3234,7 @@ xfs_readdir(
3248 } 3234 }
3249 3235
3250 lock_mode = xfs_ilock_map_shared(dp); 3236 lock_mode = xfs_ilock_map_shared(dp);
3251 start_offset = uiop->uio_offset;
3252 error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp); 3237 error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp);
3253 if (start_offset != uiop->uio_offset) {
3254 xfs_ichgtime(dp, XFS_ICHGTIME_ACC);
3255 }
3256 xfs_iunlock_map_shared(dp, lock_mode); 3238 xfs_iunlock_map_shared(dp, lock_mode);
3257 return error; 3239 return error;
3258} 3240}
@@ -3635,9 +3617,9 @@ xfs_rwlock(
3635 if (locktype == VRWLOCK_WRITE) { 3617 if (locktype == VRWLOCK_WRITE) {
3636 xfs_ilock(ip, XFS_IOLOCK_EXCL); 3618 xfs_ilock(ip, XFS_IOLOCK_EXCL);
3637 } else if (locktype == VRWLOCK_TRY_READ) { 3619 } else if (locktype == VRWLOCK_TRY_READ) {
3638 return (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)); 3620 return xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED);
3639 } else if (locktype == VRWLOCK_TRY_WRITE) { 3621 } else if (locktype == VRWLOCK_TRY_WRITE) {
3640 return (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)); 3622 return xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL);
3641 } else { 3623 } else {
3642 ASSERT((locktype == VRWLOCK_READ) || 3624 ASSERT((locktype == VRWLOCK_READ) ||
3643 (locktype == VRWLOCK_WRITE_DIRECT)); 3625 (locktype == VRWLOCK_WRITE_DIRECT));
@@ -3829,7 +3811,12 @@ xfs_reclaim(
3829 vn_iowait(vp); 3811 vn_iowait(vp);
3830 3812
3831 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 3813 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
3832 ASSERT(VN_CACHED(vp) == 0); 3814
3815 /*
3816 * Make sure the atime in the XFS inode is correct before freeing the
3817 * Linux inode.
3818 */
3819 xfs_synchronize_atime(ip);
3833 3820
3834 /* If we have nothing to flush with this inode then complete the 3821 /* If we have nothing to flush with this inode then complete the
3835 * teardown now, otherwise break the link between the xfs inode 3822 * teardown now, otherwise break the link between the xfs inode
@@ -3880,7 +3867,7 @@ xfs_finish_reclaim(
3880 xfs_ifunlock(ip); 3867 xfs_ifunlock(ip);
3881 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3868 xfs_iunlock(ip, XFS_ILOCK_EXCL);
3882 } 3869 }
3883 return(1); 3870 return 1;
3884 } 3871 }
3885 ip->i_flags |= XFS_IRECLAIM; 3872 ip->i_flags |= XFS_IRECLAIM;
3886 write_unlock(&ih->ih_lock); 3873 write_unlock(&ih->ih_lock);
@@ -3999,42 +3986,36 @@ xfs_alloc_file_space(
3999 int alloc_type, 3986 int alloc_type,
4000 int attr_flags) 3987 int attr_flags)
4001{ 3988{
3989 xfs_mount_t *mp = ip->i_mount;
3990 xfs_off_t count;
4002 xfs_filblks_t allocated_fsb; 3991 xfs_filblks_t allocated_fsb;
4003 xfs_filblks_t allocatesize_fsb; 3992 xfs_filblks_t allocatesize_fsb;
4004 int committed; 3993 xfs_extlen_t extsz, temp;
4005 xfs_off_t count; 3994 xfs_fileoff_t startoffset_fsb;
4006 xfs_filblks_t datablocks;
4007 int error;
4008 xfs_fsblock_t firstfsb; 3995 xfs_fsblock_t firstfsb;
4009 xfs_bmap_free_t free_list; 3996 int nimaps;
4010 xfs_bmbt_irec_t *imapp; 3997 int bmapi_flag;
4011 xfs_bmbt_irec_t imaps[1]; 3998 int quota_flag;
4012 xfs_mount_t *mp;
4013 int numrtextents;
4014 int reccount;
4015 uint resblks;
4016 int rt; 3999 int rt;
4017 int rtextsize;
4018 xfs_fileoff_t startoffset_fsb;
4019 xfs_trans_t *tp; 4000 xfs_trans_t *tp;
4020 int xfs_bmapi_flags; 4001 xfs_bmbt_irec_t imaps[1], *imapp;
4002 xfs_bmap_free_t free_list;
4003 uint qblocks, resblks, resrtextents;
4004 int committed;
4005 int error;
4021 4006
4022 vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 4007 vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
4023 mp = ip->i_mount;
4024 4008
4025 if (XFS_FORCED_SHUTDOWN(mp)) 4009 if (XFS_FORCED_SHUTDOWN(mp))
4026 return XFS_ERROR(EIO); 4010 return XFS_ERROR(EIO);
4027 4011
4028 /* 4012 rt = XFS_IS_REALTIME_INODE(ip);
4029 * determine if this is a realtime file 4013 if (unlikely(rt)) {
4030 */ 4014 if (!(extsz = ip->i_d.di_extsize))
4031 if ((rt = XFS_IS_REALTIME_INODE(ip)) != 0) { 4015 extsz = mp->m_sb.sb_rextsize;
4032 if (ip->i_d.di_extsize) 4016 } else {
4033 rtextsize = ip->i_d.di_extsize; 4017 extsz = ip->i_d.di_extsize;
4034 else 4018 }
4035 rtextsize = mp->m_sb.sb_rextsize;
4036 } else
4037 rtextsize = 0;
4038 4019
4039 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 4020 if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
4040 return error; 4021 return error;
@@ -4045,8 +4026,8 @@ xfs_alloc_file_space(
4045 count = len; 4026 count = len;
4046 error = 0; 4027 error = 0;
4047 imapp = &imaps[0]; 4028 imapp = &imaps[0];
4048 reccount = 1; 4029 nimaps = 1;
4049 xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); 4030 bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
4050 startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 4031 startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
4051 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 4032 allocatesize_fsb = XFS_B_TO_FSB(mp, count);
4052 4033
@@ -4063,47 +4044,55 @@ xfs_alloc_file_space(
4063 offset, end_dmi_offset - offset, 4044 offset, end_dmi_offset - offset,
4064 0, NULL); 4045 0, NULL);
4065 if (error) 4046 if (error)
4066 return(error); 4047 return error;
4067 } 4048 }
4068 4049
4069 /* 4050 /*
4070 * allocate file space until done or until there is an error 4051 * Allocate file space until done or until there is an error
4071 */ 4052 */
4072retry: 4053retry:
4073 while (allocatesize_fsb && !error) { 4054 while (allocatesize_fsb && !error) {
4055 xfs_fileoff_t s, e;
4056
4074 /* 4057 /*
4075 * determine if reserving space on 4058 * Determine space reservations for data/realtime.
4076 * the data or realtime partition.
4077 */ 4059 */
4078 if (rt) { 4060 if (unlikely(extsz)) {
4079 xfs_fileoff_t s, e;
4080
4081 s = startoffset_fsb; 4061 s = startoffset_fsb;
4082 do_div(s, rtextsize); 4062 do_div(s, extsz);
4083 s *= rtextsize; 4063 s *= extsz;
4084 e = roundup_64(startoffset_fsb + allocatesize_fsb, 4064 e = startoffset_fsb + allocatesize_fsb;
4085 rtextsize); 4065 if ((temp = do_mod(startoffset_fsb, extsz)))
4086 numrtextents = (int)(e - s) / mp->m_sb.sb_rextsize; 4066 e += temp;
4087 datablocks = 0; 4067 if ((temp = do_mod(e, extsz)))
4068 e += extsz - temp;
4088 } else { 4069 } else {
4089 datablocks = allocatesize_fsb; 4070 s = 0;
4090 numrtextents = 0; 4071 e = allocatesize_fsb;
4072 }
4073
4074 if (unlikely(rt)) {
4075 resrtextents = qblocks = (uint)(e - s);
4076 resrtextents /= mp->m_sb.sb_rextsize;
4077 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
4078 quota_flag = XFS_QMOPT_RES_RTBLKS;
4079 } else {
4080 resrtextents = 0;
4081 resblks = qblocks = \
4082 XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s));
4083 quota_flag = XFS_QMOPT_RES_REGBLKS;
4091 } 4084 }
4092 4085
4093 /* 4086 /*
4094 * allocate and setup the transaction 4087 * Allocate and setup the transaction.
4095 */ 4088 */
4096 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 4089 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
4097 resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks); 4090 error = xfs_trans_reserve(tp, resblks,
4098 error = xfs_trans_reserve(tp, 4091 XFS_WRITE_LOG_RES(mp), resrtextents,
4099 resblks,
4100 XFS_WRITE_LOG_RES(mp),
4101 numrtextents,
4102 XFS_TRANS_PERM_LOG_RES, 4092 XFS_TRANS_PERM_LOG_RES,
4103 XFS_WRITE_LOG_COUNT); 4093 XFS_WRITE_LOG_COUNT);
4104
4105 /* 4094 /*
4106 * check for running out of space 4095 * Check for running out of space
4107 */ 4096 */
4108 if (error) { 4097 if (error) {
4109 /* 4098 /*
@@ -4114,8 +4103,8 @@ retry:
4114 break; 4103 break;
4115 } 4104 }
4116 xfs_ilock(ip, XFS_ILOCK_EXCL); 4105 xfs_ilock(ip, XFS_ILOCK_EXCL);
4117 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, 4106 error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
4118 ip->i_udquot, ip->i_gdquot, resblks, 0, 0); 4107 qblocks, 0, quota_flag);
4119 if (error) 4108 if (error)
4120 goto error1; 4109 goto error1;
4121 4110
@@ -4123,19 +4112,19 @@ retry:
4123 xfs_trans_ihold(tp, ip); 4112 xfs_trans_ihold(tp, ip);
4124 4113
4125 /* 4114 /*
4126 * issue the bmapi() call to allocate the blocks 4115 * Issue the xfs_bmapi() call to allocate the blocks
4127 */ 4116 */
4128 XFS_BMAP_INIT(&free_list, &firstfsb); 4117 XFS_BMAP_INIT(&free_list, &firstfsb);
4129 error = xfs_bmapi(tp, ip, startoffset_fsb, 4118 error = xfs_bmapi(tp, ip, startoffset_fsb,
4130 allocatesize_fsb, xfs_bmapi_flags, 4119 allocatesize_fsb, bmapi_flag,
4131 &firstfsb, 0, imapp, &reccount, 4120 &firstfsb, 0, imapp, &nimaps,
4132 &free_list); 4121 &free_list);
4133 if (error) { 4122 if (error) {
4134 goto error0; 4123 goto error0;
4135 } 4124 }
4136 4125
4137 /* 4126 /*
4138 * complete the transaction 4127 * Complete the transaction
4139 */ 4128 */
4140 error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); 4129 error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
4141 if (error) { 4130 if (error) {
@@ -4150,7 +4139,7 @@ retry:
4150 4139
4151 allocated_fsb = imapp->br_blockcount; 4140 allocated_fsb = imapp->br_blockcount;
4152 4141
4153 if (reccount == 0) { 4142 if (nimaps == 0) {
4154 error = XFS_ERROR(ENOSPC); 4143 error = XFS_ERROR(ENOSPC);
4155 break; 4144 break;
4156 } 4145 }
@@ -4173,9 +4162,11 @@ dmapi_enospc_check:
4173 4162
4174 return error; 4163 return error;
4175 4164
4176 error0: 4165error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
4177 xfs_bmap_cancel(&free_list); 4166 xfs_bmap_cancel(&free_list);
4178 error1: 4167 XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
4168
4169error1: /* Just cancel transaction */
4179 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 4170 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
4180 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4171 xfs_iunlock(ip, XFS_ILOCK_EXCL);
4181 goto dmapi_enospc_check; 4172 goto dmapi_enospc_check;
@@ -4313,7 +4304,7 @@ xfs_free_file_space(
4313 offset, end_dmi_offset - offset, 4304 offset, end_dmi_offset - offset,
4314 AT_DELAY_FLAG(attr_flags), NULL); 4305 AT_DELAY_FLAG(attr_flags), NULL);
4315 if (error) 4306 if (error)
4316 return(error); 4307 return error;
4317 } 4308 }
4318 4309
4319 ASSERT(attr_flags & ATTR_NOLOCK ? attr_flags & ATTR_DMI : 1); 4310 ASSERT(attr_flags & ATTR_NOLOCK ? attr_flags & ATTR_DMI : 1);
@@ -4420,8 +4411,8 @@ xfs_free_file_space(
4420 } 4411 }
4421 xfs_ilock(ip, XFS_ILOCK_EXCL); 4412 xfs_ilock(ip, XFS_ILOCK_EXCL);
4422 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, 4413 error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
4423 ip->i_udquot, ip->i_gdquot, resblks, 0, rt ? 4414 ip->i_udquot, ip->i_gdquot, resblks, 0,
4424 XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); 4415 XFS_QMOPT_RES_REGBLKS);
4425 if (error) 4416 if (error)
4426 goto error1; 4417 goto error1;
4427 4418