aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2010-03-01 02:55:20 -0500
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2010-03-01 02:55:20 -0500
commit35858adbfca13678af99fb31618ef4428d6dedb0 (patch)
tree3336feaa61324486945816cb52c347733e7c0821 /fs
parent197d4db752e67160d79fed09968c2140376a80a3 (diff)
parent4b70858ba8d4537daf782defebe5f2ff80ccef2b (diff)
Merge branch 'next' into for-linus
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c41
-rw-r--r--fs/aio.c40
-rw-r--r--fs/anon_inodes.c35
-rw-r--r--fs/autofs4/autofs_i.h38
-rw-r--r--fs/autofs4/expire.c8
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/root.c616
-rw-r--r--fs/binfmt_aout.c13
-rw-r--r--fs/binfmt_elf.c35
-rw-r--r--fs/binfmt_elf_fdpic.c57
-rw-r--r--fs/binfmt_flat.c6
-rw-r--r--fs/binfmt_som.c2
-rw-r--r--fs/bio.c2
-rw-r--r--fs/btrfs/acl.c80
-rw-r--r--fs/btrfs/btrfs_inode.h5
-rw-r--r--fs/btrfs/ctree.c229
-rw-r--r--fs/btrfs/ctree.h40
-rw-r--r--fs/btrfs/dir-item.c19
-rw-r--r--fs/btrfs/disk-io.c27
-rw-r--r--fs/btrfs/extent-tree.c104
-rw-r--r--fs/btrfs/file.c725
-rw-r--r--fs/btrfs/inode.c579
-rw-r--r--fs/btrfs/ioctl.c34
-rw-r--r--fs/btrfs/ordered-data.c117
-rw-r--r--fs/btrfs/ordered-data.h5
-rw-r--r--fs/btrfs/relocation.c42
-rw-r--r--fs/btrfs/super.c15
-rw-r--r--fs/btrfs/transaction.c44
-rw-r--r--fs/btrfs/transaction.h6
-rw-r--r--fs/btrfs/tree-log.c86
-rw-r--r--fs/btrfs/volumes.c6
-rw-r--r--fs/btrfs/xattr.c80
-rw-r--r--fs/btrfs/xattr.h9
-rw-r--r--fs/cachefiles/bind.c11
-rw-r--r--fs/cachefiles/daemon.c4
-rw-r--r--fs/cachefiles/rdwr.c2
-rw-r--r--fs/cifs/CHANGES4
-rw-r--r--fs/cifs/cifs_dfs_ref.c3
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/connect.c13
-rw-r--r--fs/cifs/export.c2
-rw-r--r--fs/compat.c2
-rw-r--r--fs/compat_ioctl.c5
-rw-r--r--fs/configfs/symlink.c4
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/direct-io.c165
-rw-r--r--fs/ecryptfs/crypto.c4
-rw-r--r--fs/ecryptfs/dentry.c2
-rw-r--r--fs/ecryptfs/file.c17
-rw-r--r--fs/ecryptfs/inode.c164
-rw-r--r--fs/ecryptfs/main.c13
-rw-r--r--fs/eventfd.c2
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exec.c49
-rw-r--r--fs/exofs/inode.c17
-rw-r--r--fs/exofs/pnfs.h10
-rw-r--r--fs/exportfs/expfs.c2
-rw-r--r--fs/ext2/acl.c79
-rw-r--r--fs/ext2/dir.c2
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/file.c21
-rw-r--r--fs/ext2/super.c22
-rw-r--r--fs/ext2/xattr.c11
-rw-r--r--fs/ext2/xattr_security.c16
-rw-r--r--fs/ext2/xattr_trusted.c16
-rw-r--r--fs/ext2/xattr_user.c25
-rw-r--r--fs/ext3/acl.c74
-rw-r--r--fs/ext3/inode.c8
-rw-r--r--fs/ext3/namei.c28
-rw-r--r--fs/ext3/resize.c35
-rw-r--r--fs/ext3/super.c19
-rw-r--r--fs/ext3/xattr.c31
-rw-r--r--fs/ext3/xattr_security.c20
-rw-r--r--fs/ext3/xattr_trusted.c18
-rw-r--r--fs/ext3/xattr_user.c25
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/ext4/acl.c74
-rw-r--r--fs/ext4/block_validity.c1
-rw-r--r--fs/ext4/ext4.h8
-rw-r--r--fs/ext4/ext4_extents.h3
-rw-r--r--fs/ext4/extents.c77
-rw-r--r--fs/ext4/fsync.c16
-rw-r--r--fs/ext4/inode.c235
-rw-r--r--fs/ext4/mballoc.c6
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/super.c19
-rw-r--r--fs/ext4/xattr.c33
-rw-r--r--fs/ext4/xattr_security.c20
-rw-r--r--fs/ext4/xattr_trusted.c20
-rw-r--r--fs/ext4/xattr_user.c25
-rw-r--r--fs/fat/fat.h3
-rw-r--r--fs/fat/fatent.c25
-rw-r--r--fs/fat/inode.c8
-rw-r--r--fs/fat/misc.c57
-rw-r--r--fs/fcntl.c102
-rw-r--r--fs/file_table.c50
-rw-r--r--fs/fs-writeback.c18
-rw-r--r--fs/fscache/object-list.c2
-rw-r--r--fs/generic_acl.c158
-rw-r--r--fs/gfs2/acl.c16
-rw-r--r--fs/gfs2/file.c38
-rw-r--r--fs/gfs2/inode.c5
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_inode.c6
-rw-r--r--fs/gfs2/sys.c16
-rw-r--r--fs/gfs2/xattr.c90
-rw-r--r--fs/gfs2/xattr.h7
-rw-r--r--fs/hfs/catalog.c4
-rw-r--r--fs/hfs/dir.c11
-rw-r--r--fs/hfs/super.c7
-rw-r--r--fs/hpfs/super.c17
-rw-r--r--fs/hppfs/hppfs.c18
-rw-r--r--fs/hugetlbfs/inode.c17
-rw-r--r--fs/inode.c26
-rw-r--r--fs/internal.h8
-rw-r--r--fs/isofs/export.c2
-rw-r--r--fs/jbd/journal.c2
-rw-r--r--fs/jbd2/checkpoint.c15
-rw-r--r--fs/jbd2/commit.c19
-rw-r--r--fs/jbd2/journal.c5
-rw-r--r--fs/jffs2/acl.c65
-rw-r--r--fs/jffs2/gc.c3
-rw-r--r--fs/jffs2/readinode.c2
-rw-r--r--fs/jffs2/security.c18
-rw-r--r--fs/jffs2/summary.c2
-rw-r--r--fs/jffs2/xattr.c6
-rw-r--r--fs/jffs2/xattr_trusted.c18
-rw-r--r--fs/jffs2/xattr_user.c18
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/libfs.c1
-rw-r--r--fs/lockd/svc4proc.c4
-rw-r--r--fs/lockd/svcproc.c4
-rw-r--r--fs/namei.c474
-rw-r--r--fs/namespace.c14
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfs/dir.c1
-rw-r--r--fs/nfs/nfs4_fs.h5
-rw-r--r--fs/nfs/nfs4proc.c203
-rw-r--r--fs/nfs/nfs4state.c60
-rw-r--r--fs/nfsctl.c2
-rw-r--r--fs/nfsd/auth.c12
-rw-r--r--fs/nfsd/cache.h83
-rw-r--r--fs/nfsd/export.c65
-rw-r--r--fs/nfsd/lockd.c10
-rw-r--r--fs/nfsd/nfs2acl.c27
-rw-r--r--fs/nfsd/nfs3acl.c15
-rw-r--r--fs/nfsd/nfs3proc.c20
-rw-r--r--fs/nfsd/nfs3xdr.c15
-rw-r--r--fs/nfsd/nfs4acl.c12
-rw-r--r--fs/nfsd/nfs4callback.c19
-rw-r--r--fs/nfsd/nfs4idmap.c17
-rw-r--r--fs/nfsd/nfs4proc.c19
-rw-r--r--fs/nfsd/nfs4recover.c16
-rw-r--r--fs/nfsd/nfs4state.c84
-rw-r--r--fs/nfsd/nfs4xdr.c26
-rw-r--r--fs/nfsd/nfscache.c14
-rw-r--r--fs/nfsd/nfsctl.c51
-rw-r--r--fs/nfsd/nfsd.h338
-rw-r--r--fs/nfsd/nfsfh.c102
-rw-r--r--fs/nfsd/nfsfh.h208
-rw-r--r--fs/nfsd/nfsproc.c22
-rw-r--r--fs/nfsd/nfssvc.c22
-rw-r--r--fs/nfsd/nfsxdr.c12
-rw-r--r--fs/nfsd/state.h408
-rw-r--r--fs/nfsd/stats.c11
-rw-r--r--fs/nfsd/vfs.c144
-rw-r--r--fs/nfsd/vfs.h101
-rw-r--r--fs/nfsd/xdr.h173
-rw-r--r--fs/nfsd/xdr3.h344
-rw-r--r--fs/nfsd/xdr4.h562
-rw-r--r--fs/nilfs2/bmap.c4
-rw-r--r--fs/nilfs2/cpfile.c31
-rw-r--r--fs/nilfs2/direct.c17
-rw-r--r--fs/nilfs2/ioctl.c2
-rw-r--r--fs/nilfs2/super.c3
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c33
-rw-r--r--fs/ntfs/inode.c6
-rw-r--r--fs/ocfs2/Kconfig10
-rw-r--r--fs/ocfs2/Makefile7
-rw-r--r--fs/ocfs2/acl.c91
-rw-r--r--fs/ocfs2/acl.h22
-rw-r--r--fs/ocfs2/alloc.c14
-rw-r--r--fs/ocfs2/alloc.h5
-rw-r--r--fs/ocfs2/aops.c34
-rw-r--r--fs/ocfs2/cluster/heartbeat.c6
-rw-r--r--fs/ocfs2/cluster/nodemanager.c51
-rw-r--r--fs/ocfs2/cluster/nodemanager.h7
-rw-r--r--fs/ocfs2/cluster/quorum.c16
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c18
-rw-r--r--fs/ocfs2/extent_map.c25
-rw-r--r--fs/ocfs2/file.c21
-rw-r--r--fs/ocfs2/namei.c6
-rw-r--r--fs/ocfs2/ocfs2.h8
-rw-r--r--fs/ocfs2/ocfs2_fs.h2
-rw-r--r--fs/ocfs2/refcounttree.c150
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/super.c95
-rw-r--r--fs/ocfs2/symlink.c2
-rw-r--r--fs/ocfs2/xattr.c78
-rw-r--r--fs/ocfs2/xattr.h2
-rw-r--r--fs/open.c17
-rw-r--r--fs/pipe.c45
-rw-r--r--fs/proc/array.c108
-rw-r--r--fs/proc/base.c73
-rw-r--r--fs/proc/generic.c21
-rw-r--r--fs/proc/inode.c31
-rw-r--r--fs/proc/internal.h10
-rw-r--r--fs/proc/page.c45
-rw-r--r--fs/proc/task_mmu.c48
-rw-r--r--fs/proc/task_nommu.c8
-rw-r--r--fs/qnx4/bitmap.c24
-rw-r--r--fs/qnx4/inode.c22
-rw-r--r--fs/quota/dquot.c291
-rw-r--r--fs/quota/quota_v2.c9
-rw-r--r--fs/ramfs/file-nommu.c28
-rw-r--r--fs/reiserfs/Makefile6
-rw-r--r--fs/reiserfs/bitmap.c3
-rw-r--r--fs/reiserfs/inode.c42
-rw-r--r--fs/reiserfs/ioctl.c3
-rw-r--r--fs/reiserfs/journal.c18
-rw-r--r--fs/reiserfs/lock.c9
-rw-r--r--fs/reiserfs/namei.c7
-rw-r--r--fs/reiserfs/procfs.c65
-rw-r--r--fs/reiserfs/super.c4
-rw-r--r--fs/reiserfs/xattr.c74
-rw-r--r--fs/reiserfs/xattr_acl.c71
-rw-r--r--fs/reiserfs/xattr_security.c21
-rw-r--r--fs/reiserfs/xattr_trusted.c21
-rw-r--r--fs/reiserfs/xattr_user.c21
-rw-r--r--fs/signalfd.c2
-rw-r--r--fs/stack.c71
-rw-r--r--fs/stat.c10
-rw-r--r--fs/super.c3
-rw-r--r--fs/sync.c59
-rw-r--r--fs/sysfs/bin.c6
-rw-r--r--fs/sysfs/dir.c14
-rw-r--r--fs/sysfs/sysfs.h15
-rw-r--r--fs/timerfd.c2
-rw-r--r--fs/ubifs/debug.c9
-rw-r--r--fs/ubifs/file.c2
-rw-r--r--fs/ubifs/gc.c96
-rw-r--r--fs/ubifs/super.c7
-rw-r--r--fs/ufs/dir.c10
-rw-r--r--fs/ufs/namei.c8
-rw-r--r--fs/ufs/super.c52
-rw-r--r--fs/ufs/ufs.h4
-rw-r--r--fs/xattr.c28
-rw-r--r--fs/xfs/Makefile8
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c61
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c72
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c127
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h34
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c87
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h45
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c118
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c184
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c75
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h1422
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c71
-rw-r--r--fs/xfs/quota/xfs_dquot.c110
-rw-r--r--fs/xfs/quota/xfs_dquot.h21
-rw-r--r--fs/xfs/quota/xfs_qm.c40
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c6
-rw-r--r--fs/xfs/support/ktrace.c323
-rw-r--r--fs/xfs/support/ktrace.h85
-rw-r--r--fs/xfs/xfs.h16
-rw-r--r--fs/xfs/xfs_acl.h3
-rw-r--r--fs/xfs/xfs_ag.h14
-rw-r--r--fs/xfs/xfs_alloc.c270
-rw-r--r--fs/xfs/xfs_alloc.h27
-rw-r--r--fs/xfs/xfs_alloc_btree.c1
-rw-r--r--fs/xfs/xfs_attr.c107
-rw-r--r--fs/xfs/xfs_attr.h10
-rw-r--r--fs/xfs/xfs_attr_leaf.c14
-rw-r--r--fs/xfs/xfs_attr_sf.h40
-rw-r--r--fs/xfs/xfs_bmap.c942
-rw-r--r--fs/xfs/xfs_bmap.h58
-rw-r--r--fs/xfs/xfs_bmap_btree.c6
-rw-r--r--fs/xfs/xfs_bmap_btree.h14
-rw-r--r--fs/xfs/xfs_btree.c5
-rw-r--r--fs/xfs/xfs_btree_trace.h17
-rw-r--r--fs/xfs/xfs_buf_item.c87
-rw-r--r--fs/xfs/xfs_buf_item.h20
-rw-r--r--fs/xfs/xfs_da_btree.c3
-rw-r--r--fs/xfs/xfs_da_btree.h7
-rw-r--r--fs/xfs/xfs_dfrag.c108
-rw-r--r--fs/xfs/xfs_dir2.c8
-rw-r--r--fs/xfs/xfs_dir2_block.c20
-rw-r--r--fs/xfs/xfs_dir2_leaf.c21
-rw-r--r--fs/xfs/xfs_dir2_node.c27
-rw-r--r--fs/xfs/xfs_dir2_sf.c26
-rw-r--r--fs/xfs/xfs_dir2_trace.c216
-rw-r--r--fs/xfs/xfs_dir2_trace.h72
-rw-r--r--fs/xfs/xfs_filestream.c8
-rw-r--r--fs/xfs/xfs_fsops.c2
-rw-r--r--fs/xfs/xfs_iget.c128
-rw-r--r--fs/xfs/xfs_inode.c96
-rw-r--r--fs/xfs/xfs_inode.h82
-rw-r--r--fs/xfs/xfs_inode_item.c5
-rw-r--r--fs/xfs/xfs_inode_item.h6
-rw-r--r--fs/xfs/xfs_iomap.c85
-rw-r--r--fs/xfs/xfs_iomap.h8
-rw-r--r--fs/xfs/xfs_log.c183
-rw-r--r--fs/xfs/xfs_log_priv.h20
-rw-r--r--fs/xfs/xfs_log_recover.c15
-rw-r--r--fs/xfs/xfs_mount.c2
-rw-r--r--fs/xfs/xfs_quota.h8
-rw-r--r--fs/xfs/xfs_rename.c1
-rw-r--r--fs/xfs/xfs_rtalloc.c3
-rw-r--r--fs/xfs/xfs_rw.c3
-rw-r--r--fs/xfs/xfs_trans.h47
-rw-r--r--fs/xfs/xfs_trans_buf.c62
-rw-r--r--fs/xfs/xfs_vnodeops.c101
324 files changed, 9969 insertions, 7531 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 18f74ec4dce9..9d03d1ebca6f 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1001,44 +1001,6 @@ done:
1001} 1001}
1002 1002
1003/** 1003/**
1004 * v9fs_vfs_readlink - read a symlink's location
1005 * @dentry: dentry for symlink
1006 * @buffer: buffer to load symlink location into
1007 * @buflen: length of buffer
1008 *
1009 */
1010
1011static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
1012 int buflen)
1013{
1014 int retval;
1015 int ret;
1016 char *link = __getname();
1017
1018 if (unlikely(!link))
1019 return -ENOMEM;
1020
1021 if (buflen > PATH_MAX)
1022 buflen = PATH_MAX;
1023
1024 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
1025 dentry);
1026
1027 retval = v9fs_readlink(dentry, link, buflen);
1028
1029 if (retval > 0) {
1030 if ((ret = copy_to_user(buffer, link, retval)) != 0) {
1031 P9_DPRINTK(P9_DEBUG_ERROR,
1032 "problem copying to user: %d\n", ret);
1033 retval = ret;
1034 }
1035 }
1036
1037 __putname(link);
1038 return retval;
1039}
1040
1041/**
1042 * v9fs_vfs_follow_link - follow a symlink path 1004 * v9fs_vfs_follow_link - follow a symlink path
1043 * @dentry: dentry for symlink 1005 * @dentry: dentry for symlink
1044 * @nd: nameidata 1006 * @nd: nameidata
@@ -1230,7 +1192,6 @@ static const struct inode_operations v9fs_dir_inode_operations_ext = {
1230 .rmdir = v9fs_vfs_rmdir, 1192 .rmdir = v9fs_vfs_rmdir,
1231 .mknod = v9fs_vfs_mknod, 1193 .mknod = v9fs_vfs_mknod,
1232 .rename = v9fs_vfs_rename, 1194 .rename = v9fs_vfs_rename,
1233 .readlink = v9fs_vfs_readlink,
1234 .getattr = v9fs_vfs_getattr, 1195 .getattr = v9fs_vfs_getattr,
1235 .setattr = v9fs_vfs_setattr, 1196 .setattr = v9fs_vfs_setattr,
1236}; 1197};
@@ -1253,7 +1214,7 @@ static const struct inode_operations v9fs_file_inode_operations = {
1253}; 1214};
1254 1215
1255static const struct inode_operations v9fs_symlink_inode_operations = { 1216static const struct inode_operations v9fs_symlink_inode_operations = {
1256 .readlink = v9fs_vfs_readlink, 1217 .readlink = generic_readlink,
1257 .follow_link = v9fs_vfs_follow_link, 1218 .follow_link = v9fs_vfs_follow_link,
1258 .put_link = v9fs_vfs_put_link, 1219 .put_link = v9fs_vfs_put_link,
1259 .getattr = v9fs_vfs_getattr, 1220 .getattr = v9fs_vfs_getattr,
diff --git a/fs/aio.c b/fs/aio.c
index c30dfc006108..1cf12b3dd83a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -711,10 +711,8 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
711 */ 711 */
712 ret = retry(iocb); 712 ret = retry(iocb);
713 713
714 if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { 714 if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED)
715 BUG_ON(!list_empty(&iocb->ki_wait.task_list));
716 aio_complete(iocb, ret, 0); 715 aio_complete(iocb, ret, 0);
717 }
718out: 716out:
719 spin_lock_irq(&ctx->ctx_lock); 717 spin_lock_irq(&ctx->ctx_lock);
720 718
@@ -866,13 +864,6 @@ static void try_queue_kicked_iocb(struct kiocb *iocb)
866 unsigned long flags; 864 unsigned long flags;
867 int run = 0; 865 int run = 0;
868 866
869 /* We're supposed to be the only path putting the iocb back on the run
870 * list. If we find that the iocb is *back* on a wait queue already
871 * than retry has happened before we could queue the iocb. This also
872 * means that the retry could have completed and freed our iocb, no
873 * good. */
874 BUG_ON((!list_empty(&iocb->ki_wait.task_list)));
875
876 spin_lock_irqsave(&ctx->ctx_lock, flags); 867 spin_lock_irqsave(&ctx->ctx_lock, flags);
877 /* set this inside the lock so that we can't race with aio_run_iocb() 868 /* set this inside the lock so that we can't race with aio_run_iocb()
878 * testing it and putting the iocb on the run list under the lock */ 869 * testing it and putting the iocb on the run list under the lock */
@@ -886,7 +877,7 @@ static void try_queue_kicked_iocb(struct kiocb *iocb)
886/* 877/*
887 * kick_iocb: 878 * kick_iocb:
888 * Called typically from a wait queue callback context 879 * Called typically from a wait queue callback context
889 * (aio_wake_function) to trigger a retry of the iocb. 880 * to trigger a retry of the iocb.
890 * The retry is usually executed by aio workqueue 881 * The retry is usually executed by aio workqueue
891 * threads (See aio_kick_handler). 882 * threads (See aio_kick_handler).
892 */ 883 */
@@ -1520,31 +1511,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb)
1520 return 0; 1511 return 0;
1521} 1512}
1522 1513
1523/*
1524 * aio_wake_function:
1525 * wait queue callback function for aio notification,
1526 * Simply triggers a retry of the operation via kick_iocb.
1527 *
1528 * This callback is specified in the wait queue entry in
1529 * a kiocb.
1530 *
1531 * Note:
1532 * This routine is executed with the wait queue lock held.
1533 * Since kick_iocb acquires iocb->ctx->ctx_lock, it nests
1534 * the ioctx lock inside the wait queue lock. This is safe
1535 * because this callback isn't used for wait queues which
1536 * are nested inside ioctx lock (i.e. ctx->wait)
1537 */
1538static int aio_wake_function(wait_queue_t *wait, unsigned mode,
1539 int sync, void *key)
1540{
1541 struct kiocb *iocb = container_of(wait, struct kiocb, ki_wait);
1542
1543 list_del_init(&wait->task_list);
1544 kick_iocb(iocb);
1545 return 1;
1546}
1547
1548static void aio_batch_add(struct address_space *mapping, 1514static void aio_batch_add(struct address_space *mapping,
1549 struct hlist_head *batch_hash) 1515 struct hlist_head *batch_hash)
1550{ 1516{
@@ -1642,8 +1608,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1642 req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf; 1608 req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf;
1643 req->ki_left = req->ki_nbytes = iocb->aio_nbytes; 1609 req->ki_left = req->ki_nbytes = iocb->aio_nbytes;
1644 req->ki_opcode = iocb->aio_lio_opcode; 1610 req->ki_opcode = iocb->aio_lio_opcode;
1645 init_waitqueue_func_entry(&req->ki_wait, aio_wake_function);
1646 INIT_LIST_HEAD(&req->ki_wait.task_list);
1647 1611
1648 ret = aio_setup_iocb(req); 1612 ret = aio_setup_iocb(req);
1649 1613
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 2ca7a7cafdbf..9f0bf13291e5 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -35,14 +35,13 @@ static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags,
35 mnt); 35 mnt);
36} 36}
37 37
38static int anon_inodefs_delete_dentry(struct dentry *dentry) 38/*
39 * anon_inodefs_dname() is called from d_path().
40 */
41static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
39{ 42{
40 /* 43 return dynamic_dname(dentry, buffer, buflen, "anon_inode:%s",
41 * We faked vfs to believe the dentry was hashed when we created it. 44 dentry->d_name.name);
42 * Now we restore the flag so that dput() will work correctly.
43 */
44 dentry->d_flags |= DCACHE_UNHASHED;
45 return 1;
46} 45}
47 46
48static struct file_system_type anon_inode_fs_type = { 47static struct file_system_type anon_inode_fs_type = {
@@ -51,7 +50,7 @@ static struct file_system_type anon_inode_fs_type = {
51 .kill_sb = kill_anon_super, 50 .kill_sb = kill_anon_super,
52}; 51};
53static const struct dentry_operations anon_inodefs_dentry_operations = { 52static const struct dentry_operations anon_inodefs_dentry_operations = {
54 .d_delete = anon_inodefs_delete_dentry, 53 .d_dname = anon_inodefs_dname,
55}; 54};
56 55
57/* 56/*
@@ -88,7 +87,7 @@ struct file *anon_inode_getfile(const char *name,
88 void *priv, int flags) 87 void *priv, int flags)
89{ 88{
90 struct qstr this; 89 struct qstr this;
91 struct dentry *dentry; 90 struct path path;
92 struct file *file; 91 struct file *file;
93 int error; 92 int error;
94 93
@@ -106,10 +105,11 @@ struct file *anon_inode_getfile(const char *name,
106 this.name = name; 105 this.name = name;
107 this.len = strlen(name); 106 this.len = strlen(name);
108 this.hash = 0; 107 this.hash = 0;
109 dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); 108 path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
110 if (!dentry) 109 if (!path.dentry)
111 goto err_module; 110 goto err_module;
112 111
112 path.mnt = mntget(anon_inode_mnt);
113 /* 113 /*
114 * We know the anon_inode inode count is always greater than zero, 114 * We know the anon_inode inode count is always greater than zero,
115 * so we can avoid doing an igrab() and we can use an open-coded 115 * so we can avoid doing an igrab() and we can use an open-coded
@@ -117,27 +117,24 @@ struct file *anon_inode_getfile(const char *name,
117 */ 117 */
118 atomic_inc(&anon_inode_inode->i_count); 118 atomic_inc(&anon_inode_inode->i_count);
119 119
120 dentry->d_op = &anon_inodefs_dentry_operations; 120 path.dentry->d_op = &anon_inodefs_dentry_operations;
121 /* Do not publish this dentry inside the global dentry hash table */ 121 d_instantiate(path.dentry, anon_inode_inode);
122 dentry->d_flags &= ~DCACHE_UNHASHED;
123 d_instantiate(dentry, anon_inode_inode);
124 122
125 error = -ENFILE; 123 error = -ENFILE;
126 file = alloc_file(anon_inode_mnt, dentry, 124 file = alloc_file(&path, OPEN_FMODE(flags), fops);
127 FMODE_READ | FMODE_WRITE, fops);
128 if (!file) 125 if (!file)
129 goto err_dput; 126 goto err_dput;
130 file->f_mapping = anon_inode_inode->i_mapping; 127 file->f_mapping = anon_inode_inode->i_mapping;
131 128
132 file->f_pos = 0; 129 file->f_pos = 0;
133 file->f_flags = O_RDWR | (flags & O_NONBLOCK); 130 file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
134 file->f_version = 0; 131 file->f_version = 0;
135 file->private_data = priv; 132 file->private_data = priv;
136 133
137 return file; 134 return file;
138 135
139err_dput: 136err_dput:
140 dput(dentry); 137 path_put(&path);
141err_module: 138err_module:
142 module_put(fops->owner); 139 module_put(fops->owner);
143 return ERR_PTR(error); 140 return ERR_PTR(error);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 8f7cdde41733..0118d67221b2 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -60,6 +60,11 @@ do { \
60 current->pid, __func__, ##args); \ 60 current->pid, __func__, ##args); \
61} while (0) 61} while (0)
62 62
63struct rehash_entry {
64 struct task_struct *task;
65 struct list_head list;
66};
67
63/* Unified info structure. This is pointed to by both the dentry and 68/* Unified info structure. This is pointed to by both the dentry and
64 inode structures. Each file in the filesystem has an instance of this 69 inode structures. Each file in the filesystem has an instance of this
65 structure. It holds a reference to the dentry, so dentries are never 70 structure. It holds a reference to the dentry, so dentries are never
@@ -75,6 +80,9 @@ struct autofs_info {
75 struct completion expire_complete; 80 struct completion expire_complete;
76 81
77 struct list_head active; 82 struct list_head active;
83 int active_count;
84 struct list_head rehash_list;
85
78 struct list_head expiring; 86 struct list_head expiring;
79 87
80 struct autofs_sb_info *sbi; 88 struct autofs_sb_info *sbi;
@@ -95,6 +103,8 @@ struct autofs_info {
95 103
96#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ 104#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
97#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */ 105#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */
106#define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */
107#define AUTOFS_INF_REHASH (1<<3) /* dentry in transit to ->lookup() */
98 108
99struct autofs_wait_queue { 109struct autofs_wait_queue {
100 wait_queue_head_t queue; 110 wait_queue_head_t queue;
@@ -161,7 +171,7 @@ static inline int autofs4_ispending(struct dentry *dentry)
161{ 171{
162 struct autofs_info *inf = autofs4_dentry_ino(dentry); 172 struct autofs_info *inf = autofs4_dentry_ino(dentry);
163 173
164 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) 174 if (inf->flags & AUTOFS_INF_PENDING)
165 return 1; 175 return 1;
166 176
167 if (inf->flags & AUTOFS_INF_EXPIRING) 177 if (inf->flags & AUTOFS_INF_EXPIRING)
@@ -264,5 +274,31 @@ out:
264 return ret; 274 return ret;
265} 275}
266 276
277static inline void autofs4_add_expiring(struct dentry *dentry)
278{
279 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
280 struct autofs_info *ino = autofs4_dentry_ino(dentry);
281 if (ino) {
282 spin_lock(&sbi->lookup_lock);
283 if (list_empty(&ino->expiring))
284 list_add(&ino->expiring, &sbi->expiring_list);
285 spin_unlock(&sbi->lookup_lock);
286 }
287 return;
288}
289
290static inline void autofs4_del_expiring(struct dentry *dentry)
291{
292 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
293 struct autofs_info *ino = autofs4_dentry_ino(dentry);
294 if (ino) {
295 spin_lock(&sbi->lookup_lock);
296 if (!list_empty(&ino->expiring))
297 list_del_init(&ino->expiring);
298 spin_unlock(&sbi->lookup_lock);
299 }
300 return;
301}
302
267void autofs4_dentry_release(struct dentry *); 303void autofs4_dentry_release(struct dentry *);
268extern void autofs4_kill_sb(struct super_block *); 304extern void autofs4_kill_sb(struct super_block *);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 3da18d453488..74bc9aa6df31 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -27,7 +27,7 @@ static inline int autofs4_can_expire(struct dentry *dentry,
27 return 0; 27 return 0;
28 28
29 /* No point expiring a pending mount */ 29 /* No point expiring a pending mount */
30 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) 30 if (ino->flags & AUTOFS_INF_PENDING)
31 return 0; 31 return 0;
32 32
33 if (!do_now) { 33 if (!do_now) {
@@ -279,6 +279,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
279 root->d_mounted--; 279 root->d_mounted--;
280 } 280 }
281 ino->flags |= AUTOFS_INF_EXPIRING; 281 ino->flags |= AUTOFS_INF_EXPIRING;
282 autofs4_add_expiring(root);
282 init_completion(&ino->expire_complete); 283 init_completion(&ino->expire_complete);
283 spin_unlock(&sbi->fs_lock); 284 spin_unlock(&sbi->fs_lock);
284 return root; 285 return root;
@@ -406,6 +407,7 @@ found:
406 expired, (int)expired->d_name.len, expired->d_name.name); 407 expired, (int)expired->d_name.len, expired->d_name.name);
407 ino = autofs4_dentry_ino(expired); 408 ino = autofs4_dentry_ino(expired);
408 ino->flags |= AUTOFS_INF_EXPIRING; 409 ino->flags |= AUTOFS_INF_EXPIRING;
410 autofs4_add_expiring(expired);
409 init_completion(&ino->expire_complete); 411 init_completion(&ino->expire_complete);
410 spin_unlock(&sbi->fs_lock); 412 spin_unlock(&sbi->fs_lock);
411 spin_lock(&dcache_lock); 413 spin_lock(&dcache_lock);
@@ -433,7 +435,7 @@ int autofs4_expire_wait(struct dentry *dentry)
433 435
434 DPRINTK("expire done status=%d", status); 436 DPRINTK("expire done status=%d", status);
435 437
436 if (d_unhashed(dentry)) 438 if (d_unhashed(dentry) && IS_DEADDIR(dentry->d_inode))
437 return -EAGAIN; 439 return -EAGAIN;
438 440
439 return status; 441 return status;
@@ -473,6 +475,7 @@ int autofs4_expire_run(struct super_block *sb,
473 spin_lock(&sbi->fs_lock); 475 spin_lock(&sbi->fs_lock);
474 ino = autofs4_dentry_ino(dentry); 476 ino = autofs4_dentry_ino(dentry);
475 ino->flags &= ~AUTOFS_INF_EXPIRING; 477 ino->flags &= ~AUTOFS_INF_EXPIRING;
478 autofs4_del_expiring(dentry);
476 complete_all(&ino->expire_complete); 479 complete_all(&ino->expire_complete);
477 spin_unlock(&sbi->fs_lock); 480 spin_unlock(&sbi->fs_lock);
478 481
@@ -503,6 +506,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
503 ino->flags &= ~AUTOFS_INF_MOUNTPOINT; 506 ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
504 } 507 }
505 ino->flags &= ~AUTOFS_INF_EXPIRING; 508 ino->flags &= ~AUTOFS_INF_EXPIRING;
509 autofs4_del_expiring(dentry);
506 complete_all(&ino->expire_complete); 510 complete_all(&ino->expire_complete);
507 spin_unlock(&sbi->fs_lock); 511 spin_unlock(&sbi->fs_lock);
508 dput(dentry); 512 dput(dentry);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 69c8142da838..d0a3de247458 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -49,6 +49,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
49 ino->dentry = NULL; 49 ino->dentry = NULL;
50 ino->size = 0; 50 ino->size = 0;
51 INIT_LIST_HEAD(&ino->active); 51 INIT_LIST_HEAD(&ino->active);
52 INIT_LIST_HEAD(&ino->rehash_list);
53 ino->active_count = 0;
52 INIT_LIST_HEAD(&ino->expiring); 54 INIT_LIST_HEAD(&ino->expiring);
53 atomic_set(&ino->count, 0); 55 atomic_set(&ino->count, 0);
54 } 56 }
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index b96a3c57359d..30cc9ddf4b70 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -72,6 +72,139 @@ const struct inode_operations autofs4_dir_inode_operations = {
72 .rmdir = autofs4_dir_rmdir, 72 .rmdir = autofs4_dir_rmdir,
73}; 73};
74 74
75static void autofs4_add_active(struct dentry *dentry)
76{
77 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
78 struct autofs_info *ino = autofs4_dentry_ino(dentry);
79 if (ino) {
80 spin_lock(&sbi->lookup_lock);
81 if (!ino->active_count) {
82 if (list_empty(&ino->active))
83 list_add(&ino->active, &sbi->active_list);
84 }
85 ino->active_count++;
86 spin_unlock(&sbi->lookup_lock);
87 }
88 return;
89}
90
91static void autofs4_del_active(struct dentry *dentry)
92{
93 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
94 struct autofs_info *ino = autofs4_dentry_ino(dentry);
95 if (ino) {
96 spin_lock(&sbi->lookup_lock);
97 ino->active_count--;
98 if (!ino->active_count) {
99 if (!list_empty(&ino->active))
100 list_del_init(&ino->active);
101 }
102 spin_unlock(&sbi->lookup_lock);
103 }
104 return;
105}
106
107static void autofs4_add_rehash_entry(struct autofs_info *ino,
108 struct rehash_entry *entry)
109{
110 entry->task = current;
111 INIT_LIST_HEAD(&entry->list);
112 list_add(&entry->list, &ino->rehash_list);
113 return;
114}
115
116static void autofs4_remove_rehash_entry(struct autofs_info *ino)
117{
118 struct list_head *head = &ino->rehash_list;
119 struct rehash_entry *entry;
120 list_for_each_entry(entry, head, list) {
121 if (entry->task == current) {
122 list_del(&entry->list);
123 kfree(entry);
124 break;
125 }
126 }
127 return;
128}
129
130static void autofs4_remove_rehash_entrys(struct autofs_info *ino)
131{
132 struct autofs_sb_info *sbi = ino->sbi;
133 struct rehash_entry *entry, *next;
134 struct list_head *head;
135
136 spin_lock(&sbi->fs_lock);
137 spin_lock(&sbi->lookup_lock);
138 if (!(ino->flags & AUTOFS_INF_REHASH)) {
139 spin_unlock(&sbi->lookup_lock);
140 spin_unlock(&sbi->fs_lock);
141 return;
142 }
143 ino->flags &= ~AUTOFS_INF_REHASH;
144 head = &ino->rehash_list;
145 list_for_each_entry_safe(entry, next, head, list) {
146 list_del(&entry->list);
147 kfree(entry);
148 }
149 spin_unlock(&sbi->lookup_lock);
150 spin_unlock(&sbi->fs_lock);
151 dput(ino->dentry);
152
153 return;
154}
155
156static void autofs4_revalidate_drop(struct dentry *dentry,
157 struct rehash_entry *entry)
158{
159 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
160 struct autofs_info *ino = autofs4_dentry_ino(dentry);
161 /*
162 * Add to the active list so we can pick this up in
163 * ->lookup(). Also add an entry to a rehash list so
164 * we know when there are no dentrys in flight so we
165 * know when we can rehash the dentry.
166 */
167 spin_lock(&sbi->lookup_lock);
168 if (list_empty(&ino->active))
169 list_add(&ino->active, &sbi->active_list);
170 autofs4_add_rehash_entry(ino, entry);
171 spin_unlock(&sbi->lookup_lock);
172 if (!(ino->flags & AUTOFS_INF_REHASH)) {
173 ino->flags |= AUTOFS_INF_REHASH;
174 dget(dentry);
175 spin_lock(&dentry->d_lock);
176 __d_drop(dentry);
177 spin_unlock(&dentry->d_lock);
178 }
179 return;
180}
181
182static void autofs4_revalidate_rehash(struct dentry *dentry)
183{
184 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
185 struct autofs_info *ino = autofs4_dentry_ino(dentry);
186 if (ino->flags & AUTOFS_INF_REHASH) {
187 spin_lock(&sbi->lookup_lock);
188 autofs4_remove_rehash_entry(ino);
189 if (list_empty(&ino->rehash_list)) {
190 spin_unlock(&sbi->lookup_lock);
191 ino->flags &= ~AUTOFS_INF_REHASH;
192 d_rehash(dentry);
193 dput(ino->dentry);
194 } else
195 spin_unlock(&sbi->lookup_lock);
196 }
197 return;
198}
199
200static unsigned int autofs4_need_mount(unsigned int flags)
201{
202 unsigned int res = 0;
203 if (flags & (TRIGGER_FLAGS | TRIGGER_INTENTS))
204 res = 1;
205 return res;
206}
207
75static int autofs4_dir_open(struct inode *inode, struct file *file) 208static int autofs4_dir_open(struct inode *inode, struct file *file)
76{ 209{
77 struct dentry *dentry = file->f_path.dentry; 210 struct dentry *dentry = file->f_path.dentry;
@@ -93,7 +226,7 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
93 * it. 226 * it.
94 */ 227 */
95 spin_lock(&dcache_lock); 228 spin_lock(&dcache_lock);
96 if (!d_mountpoint(dentry) && __simple_empty(dentry)) { 229 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
97 spin_unlock(&dcache_lock); 230 spin_unlock(&dcache_lock);
98 return -ENOENT; 231 return -ENOENT;
99 } 232 }
@@ -103,7 +236,7 @@ out:
103 return dcache_dir_open(inode, file); 236 return dcache_dir_open(inode, file);
104} 237}
105 238
106static int try_to_fill_dentry(struct dentry *dentry, int flags) 239static int try_to_fill_dentry(struct dentry *dentry)
107{ 240{
108 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 241 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
109 struct autofs_info *ino = autofs4_dentry_ino(dentry); 242 struct autofs_info *ino = autofs4_dentry_ino(dentry);
@@ -116,55 +249,17 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
116 * Wait for a pending mount, triggering one if there 249 * Wait for a pending mount, triggering one if there
117 * isn't one already 250 * isn't one already
118 */ 251 */
119 if (dentry->d_inode == NULL) { 252 DPRINTK("waiting for mount name=%.*s",
120 DPRINTK("waiting for mount name=%.*s", 253 dentry->d_name.len, dentry->d_name.name);
121 dentry->d_name.len, dentry->d_name.name);
122
123 status = autofs4_wait(sbi, dentry, NFY_MOUNT);
124
125 DPRINTK("mount done status=%d", status);
126
127 /* Turn this into a real negative dentry? */
128 if (status == -ENOENT) {
129 spin_lock(&dentry->d_lock);
130 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
131 spin_unlock(&dentry->d_lock);
132 return status;
133 } else if (status) {
134 /* Return a negative dentry, but leave it "pending" */
135 return status;
136 }
137 /* Trigger mount for path component or follow link */
138 } else if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
139 flags & (TRIGGER_FLAGS | TRIGGER_INTENTS) ||
140 current->link_count) {
141 DPRINTK("waiting for mount name=%.*s",
142 dentry->d_name.len, dentry->d_name.name);
143
144 spin_lock(&dentry->d_lock);
145 dentry->d_flags |= DCACHE_AUTOFS_PENDING;
146 spin_unlock(&dentry->d_lock);
147 status = autofs4_wait(sbi, dentry, NFY_MOUNT);
148 254
149 DPRINTK("mount done status=%d", status); 255 status = autofs4_wait(sbi, dentry, NFY_MOUNT);
150 256
151 if (status) { 257 DPRINTK("mount done status=%d", status);
152 spin_lock(&dentry->d_lock);
153 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
154 spin_unlock(&dentry->d_lock);
155 return status;
156 }
157 }
158
159 /* Initialize expiry counter after successful mount */
160 if (ino)
161 ino->last_used = jiffies;
162 258
163 spin_lock(&dentry->d_lock); 259 /* Update expiry counter */
164 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 260 ino->last_used = jiffies;
165 spin_unlock(&dentry->d_lock);
166 261
167 return 0; 262 return status;
168} 263}
169 264
170/* For autofs direct mounts the follow link triggers the mount */ 265/* For autofs direct mounts the follow link triggers the mount */
@@ -202,27 +297,39 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
202 autofs4_expire_wait(dentry); 297 autofs4_expire_wait(dentry);
203 298
204 /* We trigger a mount for almost all flags */ 299 /* We trigger a mount for almost all flags */
205 lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS); 300 lookup_type = autofs4_need_mount(nd->flags);
206 if (!(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING)) 301 spin_lock(&sbi->fs_lock);
302 spin_lock(&dcache_lock);
303 if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) {
304 spin_unlock(&dcache_lock);
305 spin_unlock(&sbi->fs_lock);
207 goto follow; 306 goto follow;
307 }
208 308
209 /* 309 /*
210 * If the dentry contains directories then it is an autofs 310 * If the dentry contains directories then it is an autofs
211 * multi-mount with no root mount offset. So don't try to 311 * multi-mount with no root mount offset. So don't try to
212 * mount it again. 312 * mount it again.
213 */ 313 */
214 spin_lock(&dcache_lock); 314 if (ino->flags & AUTOFS_INF_PENDING ||
215 if (dentry->d_flags & DCACHE_AUTOFS_PENDING || 315 (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) {
216 (!d_mountpoint(dentry) && __simple_empty(dentry))) { 316 ino->flags |= AUTOFS_INF_PENDING;
217 spin_unlock(&dcache_lock); 317 spin_unlock(&dcache_lock);
318 spin_unlock(&sbi->fs_lock);
319
320 status = try_to_fill_dentry(dentry);
321
322 spin_lock(&sbi->fs_lock);
323 ino->flags &= ~AUTOFS_INF_PENDING;
324 spin_unlock(&sbi->fs_lock);
218 325
219 status = try_to_fill_dentry(dentry, 0);
220 if (status) 326 if (status)
221 goto out_error; 327 goto out_error;
222 328
223 goto follow; 329 goto follow;
224 } 330 }
225 spin_unlock(&dcache_lock); 331 spin_unlock(&dcache_lock);
332 spin_unlock(&sbi->fs_lock);
226follow: 333follow:
227 /* 334 /*
228 * If there is no root mount it must be an autofs 335 * If there is no root mount it must be an autofs
@@ -254,18 +361,47 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
254{ 361{
255 struct inode *dir = dentry->d_parent->d_inode; 362 struct inode *dir = dentry->d_parent->d_inode;
256 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); 363 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
257 int oz_mode = autofs4_oz_mode(sbi); 364 struct autofs_info *ino = autofs4_dentry_ino(dentry);
365 struct rehash_entry *entry;
258 int flags = nd ? nd->flags : 0; 366 int flags = nd ? nd->flags : 0;
259 int status = 1; 367 unsigned int mutex_aquired;
368
369 DPRINTK("name = %.*s oz_mode = %d",
370 dentry->d_name.len, dentry->d_name.name, oz_mode);
371
372 /* Daemon never causes a mount to trigger */
373 if (autofs4_oz_mode(sbi))
374 return 1;
375
376 entry = kmalloc(sizeof(struct rehash_entry), GFP_KERNEL);
377 if (!entry)
378 return -ENOMEM;
379
380 mutex_aquired = mutex_trylock(&dir->i_mutex);
260 381
261 /* Pending dentry */
262 spin_lock(&sbi->fs_lock); 382 spin_lock(&sbi->fs_lock);
383 spin_lock(&dcache_lock);
384 /* Pending dentry */
263 if (autofs4_ispending(dentry)) { 385 if (autofs4_ispending(dentry)) {
264 /* The daemon never causes a mount to trigger */ 386 int status;
265 spin_unlock(&sbi->fs_lock);
266 387
267 if (oz_mode) 388 /*
268 return 1; 389 * We can only unhash and send this to ->lookup() if
390 * the directory mutex is held over d_revalidate() and
391 * ->lookup(). This prevents the VFS from incorrectly
392 * seeing the dentry as non-existent.
393 */
394 ino->flags |= AUTOFS_INF_PENDING;
395 if (!mutex_aquired) {
396 autofs4_revalidate_drop(dentry, entry);
397 spin_unlock(&dcache_lock);
398 spin_unlock(&sbi->fs_lock);
399 return 0;
400 }
401 spin_unlock(&dcache_lock);
402 spin_unlock(&sbi->fs_lock);
403 mutex_unlock(&dir->i_mutex);
404 kfree(entry);
269 405
270 /* 406 /*
271 * If the directory has gone away due to an expire 407 * If the directory has gone away due to an expire
@@ -279,46 +415,82 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
279 * A zero status is success otherwise we have a 415 * A zero status is success otherwise we have a
280 * negative error code. 416 * negative error code.
281 */ 417 */
282 status = try_to_fill_dentry(dentry, flags); 418 status = try_to_fill_dentry(dentry);
419
420 spin_lock(&sbi->fs_lock);
421 ino->flags &= ~AUTOFS_INF_PENDING;
422 spin_unlock(&sbi->fs_lock);
423
283 if (status == 0) 424 if (status == 0)
284 return 1; 425 return 1;
285 426
286 return status; 427 return status;
287 } 428 }
288 spin_unlock(&sbi->fs_lock);
289
290 /* Negative dentry.. invalidate if "old" */
291 if (dentry->d_inode == NULL)
292 return 0;
293 429
294 /* Check for a non-mountpoint directory with no contents */ 430 /* Check for a non-mountpoint directory with no contents */
295 spin_lock(&dcache_lock);
296 if (S_ISDIR(dentry->d_inode->i_mode) && 431 if (S_ISDIR(dentry->d_inode->i_mode) &&
297 !d_mountpoint(dentry) && 432 !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
298 __simple_empty(dentry)) {
299 DPRINTK("dentry=%p %.*s, emptydir", 433 DPRINTK("dentry=%p %.*s, emptydir",
300 dentry, dentry->d_name.len, dentry->d_name.name); 434 dentry, dentry->d_name.len, dentry->d_name.name);
301 spin_unlock(&dcache_lock);
302 435
303 /* The daemon never causes a mount to trigger */ 436 if (autofs4_need_mount(flags) || current->link_count) {
304 if (oz_mode) 437 int status;
305 return 1;
306 438
307 /* 439 /*
308 * A zero status is success otherwise we have a 440 * We can only unhash and send this to ->lookup() if
309 * negative error code. 441 * the directory mutex is held over d_revalidate() and
310 */ 442 * ->lookup(). This prevents the VFS from incorrectly
311 status = try_to_fill_dentry(dentry, flags); 443 * seeing the dentry as non-existent.
312 if (status == 0) 444 */
313 return 1; 445 ino->flags |= AUTOFS_INF_PENDING;
446 if (!mutex_aquired) {
447 autofs4_revalidate_drop(dentry, entry);
448 spin_unlock(&dcache_lock);
449 spin_unlock(&sbi->fs_lock);
450 return 0;
451 }
452 spin_unlock(&dcache_lock);
453 spin_unlock(&sbi->fs_lock);
454 mutex_unlock(&dir->i_mutex);
455 kfree(entry);
314 456
315 return status; 457 /*
458 * A zero status is success otherwise we have a
459 * negative error code.
460 */
461 status = try_to_fill_dentry(dentry);
462
463 spin_lock(&sbi->fs_lock);
464 ino->flags &= ~AUTOFS_INF_PENDING;
465 spin_unlock(&sbi->fs_lock);
466
467 if (status == 0)
468 return 1;
469
470 return status;
471 }
316 } 472 }
317 spin_unlock(&dcache_lock); 473 spin_unlock(&dcache_lock);
474 spin_unlock(&sbi->fs_lock);
475
476 if (mutex_aquired)
477 mutex_unlock(&dir->i_mutex);
478
479 kfree(entry);
318 480
319 return 1; 481 return 1;
320} 482}
321 483
484static void autofs4_free_rehash_entrys(struct autofs_info *inf)
485{
486 struct list_head *head = &inf->rehash_list;
487 struct rehash_entry *entry, *next;
488 list_for_each_entry_safe(entry, next, head, list) {
489 list_del(&entry->list);
490 kfree(entry);
491 }
492}
493
322void autofs4_dentry_release(struct dentry *de) 494void autofs4_dentry_release(struct dentry *de)
323{ 495{
324 struct autofs_info *inf; 496 struct autofs_info *inf;
@@ -337,6 +509,8 @@ void autofs4_dentry_release(struct dentry *de)
337 list_del(&inf->active); 509 list_del(&inf->active);
338 if (!list_empty(&inf->expiring)) 510 if (!list_empty(&inf->expiring))
339 list_del(&inf->expiring); 511 list_del(&inf->expiring);
512 if (!list_empty(&inf->rehash_list))
513 autofs4_free_rehash_entrys(inf);
340 spin_unlock(&sbi->lookup_lock); 514 spin_unlock(&sbi->lookup_lock);
341 } 515 }
342 516
@@ -359,35 +533,52 @@ static const struct dentry_operations autofs4_dentry_operations = {
359 .d_release = autofs4_dentry_release, 533 .d_release = autofs4_dentry_release,
360}; 534};
361 535
362static struct dentry *autofs4_lookup_active(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name) 536static struct dentry *autofs4_lookup_active(struct dentry *dentry)
363{ 537{
538 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
539 struct dentry *parent = dentry->d_parent;
540 struct qstr *name = &dentry->d_name;
364 unsigned int len = name->len; 541 unsigned int len = name->len;
365 unsigned int hash = name->hash; 542 unsigned int hash = name->hash;
366 const unsigned char *str = name->name; 543 const unsigned char *str = name->name;
367 struct list_head *p, *head; 544 struct list_head *p, *head;
368 545
546restart:
369 spin_lock(&dcache_lock); 547 spin_lock(&dcache_lock);
370 spin_lock(&sbi->lookup_lock); 548 spin_lock(&sbi->lookup_lock);
371 head = &sbi->active_list; 549 head = &sbi->active_list;
372 list_for_each(p, head) { 550 list_for_each(p, head) {
373 struct autofs_info *ino; 551 struct autofs_info *ino;
374 struct dentry *dentry; 552 struct dentry *active;
375 struct qstr *qstr; 553 struct qstr *qstr;
376 554
377 ino = list_entry(p, struct autofs_info, active); 555 ino = list_entry(p, struct autofs_info, active);
378 dentry = ino->dentry; 556 active = ino->dentry;
379 557
380 spin_lock(&dentry->d_lock); 558 spin_lock(&active->d_lock);
381 559
382 /* Already gone? */ 560 /* Already gone? */
383 if (atomic_read(&dentry->d_count) == 0) 561 if (atomic_read(&active->d_count) == 0)
384 goto next; 562 goto next;
385 563
386 qstr = &dentry->d_name; 564 if (active->d_inode && IS_DEADDIR(active->d_inode)) {
565 if (!list_empty(&ino->rehash_list)) {
566 dget(active);
567 spin_unlock(&active->d_lock);
568 spin_unlock(&sbi->lookup_lock);
569 spin_unlock(&dcache_lock);
570 autofs4_remove_rehash_entrys(ino);
571 dput(active);
572 goto restart;
573 }
574 goto next;
575 }
576
577 qstr = &active->d_name;
387 578
388 if (dentry->d_name.hash != hash) 579 if (active->d_name.hash != hash)
389 goto next; 580 goto next;
390 if (dentry->d_parent != parent) 581 if (active->d_parent != parent)
391 goto next; 582 goto next;
392 583
393 if (qstr->len != len) 584 if (qstr->len != len)
@@ -395,15 +586,13 @@ static struct dentry *autofs4_lookup_active(struct autofs_sb_info *sbi, struct d
395 if (memcmp(qstr->name, str, len)) 586 if (memcmp(qstr->name, str, len))
396 goto next; 587 goto next;
397 588
398 if (d_unhashed(dentry)) { 589 dget(active);
399 dget(dentry); 590 spin_unlock(&active->d_lock);
400 spin_unlock(&dentry->d_lock); 591 spin_unlock(&sbi->lookup_lock);
401 spin_unlock(&sbi->lookup_lock); 592 spin_unlock(&dcache_lock);
402 spin_unlock(&dcache_lock); 593 return active;
403 return dentry;
404 }
405next: 594next:
406 spin_unlock(&dentry->d_lock); 595 spin_unlock(&active->d_lock);
407 } 596 }
408 spin_unlock(&sbi->lookup_lock); 597 spin_unlock(&sbi->lookup_lock);
409 spin_unlock(&dcache_lock); 598 spin_unlock(&dcache_lock);
@@ -411,8 +600,11 @@ next:
411 return NULL; 600 return NULL;
412} 601}
413 602
414static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name) 603static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
415{ 604{
605 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
606 struct dentry *parent = dentry->d_parent;
607 struct qstr *name = &dentry->d_name;
416 unsigned int len = name->len; 608 unsigned int len = name->len;
417 unsigned int hash = name->hash; 609 unsigned int hash = name->hash;
418 const unsigned char *str = name->name; 610 const unsigned char *str = name->name;
@@ -423,23 +615,23 @@ static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct
423 head = &sbi->expiring_list; 615 head = &sbi->expiring_list;
424 list_for_each(p, head) { 616 list_for_each(p, head) {
425 struct autofs_info *ino; 617 struct autofs_info *ino;
426 struct dentry *dentry; 618 struct dentry *expiring;
427 struct qstr *qstr; 619 struct qstr *qstr;
428 620
429 ino = list_entry(p, struct autofs_info, expiring); 621 ino = list_entry(p, struct autofs_info, expiring);
430 dentry = ino->dentry; 622 expiring = ino->dentry;
431 623
432 spin_lock(&dentry->d_lock); 624 spin_lock(&expiring->d_lock);
433 625
434 /* Bad luck, we've already been dentry_iput */ 626 /* Bad luck, we've already been dentry_iput */
435 if (!dentry->d_inode) 627 if (!expiring->d_inode)
436 goto next; 628 goto next;
437 629
438 qstr = &dentry->d_name; 630 qstr = &expiring->d_name;
439 631
440 if (dentry->d_name.hash != hash) 632 if (expiring->d_name.hash != hash)
441 goto next; 633 goto next;
442 if (dentry->d_parent != parent) 634 if (expiring->d_parent != parent)
443 goto next; 635 goto next;
444 636
445 if (qstr->len != len) 637 if (qstr->len != len)
@@ -447,15 +639,13 @@ static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct
447 if (memcmp(qstr->name, str, len)) 639 if (memcmp(qstr->name, str, len))
448 goto next; 640 goto next;
449 641
450 if (d_unhashed(dentry)) { 642 dget(expiring);
451 dget(dentry); 643 spin_unlock(&expiring->d_lock);
452 spin_unlock(&dentry->d_lock); 644 spin_unlock(&sbi->lookup_lock);
453 spin_unlock(&sbi->lookup_lock); 645 spin_unlock(&dcache_lock);
454 spin_unlock(&dcache_lock); 646 return expiring;
455 return dentry;
456 }
457next: 647next:
458 spin_unlock(&dentry->d_lock); 648 spin_unlock(&expiring->d_lock);
459 } 649 }
460 spin_unlock(&sbi->lookup_lock); 650 spin_unlock(&sbi->lookup_lock);
461 spin_unlock(&dcache_lock); 651 spin_unlock(&dcache_lock);
@@ -463,13 +653,56 @@ next:
463 return NULL; 653 return NULL;
464} 654}
465 655
656static struct autofs_info *init_new_dentry(struct autofs_sb_info *sbi,
657 struct dentry *dentry, int oz_mode)
658{
659 struct autofs_info *ino;
660
661 /*
662 * Mark the dentry incomplete but don't hash it. We do this
663 * to serialize our inode creation operations (symlink and
664 * mkdir) which prevents deadlock during the callback to
665 * the daemon. Subsequent user space lookups for the same
666 * dentry are placed on the wait queue while the daemon
667 * itself is allowed passage unresticted so the create
668 * operation itself can then hash the dentry. Finally,
669 * we check for the hashed dentry and return the newly
670 * hashed dentry.
671 */
672 dentry->d_op = &autofs4_root_dentry_operations;
673
674 /*
675 * And we need to ensure that the same dentry is used for
676 * all following lookup calls until it is hashed so that
677 * the dentry flags are persistent throughout the request.
678 */
679 ino = autofs4_init_ino(NULL, sbi, 0555);
680 if (!ino)
681 return ERR_PTR(-ENOMEM);
682
683 dentry->d_fsdata = ino;
684 ino->dentry = dentry;
685
686 /*
687 * Only set the mount pending flag for new dentrys not created
688 * by the daemon.
689 */
690 if (!oz_mode)
691 ino->flags |= AUTOFS_INF_PENDING;
692
693 d_instantiate(dentry, NULL);
694
695 return ino;
696}
697
466/* Lookups in the root directory */ 698/* Lookups in the root directory */
467static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 699static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
468{ 700{
469 struct autofs_sb_info *sbi; 701 struct autofs_sb_info *sbi;
470 struct autofs_info *ino; 702 struct autofs_info *ino;
471 struct dentry *expiring, *unhashed; 703 struct dentry *expiring, *active;
472 int oz_mode; 704 int oz_mode;
705 int status = 0;
473 706
474 DPRINTK("name = %.*s", 707 DPRINTK("name = %.*s",
475 dentry->d_name.len, dentry->d_name.name); 708 dentry->d_name.len, dentry->d_name.name);
@@ -484,123 +717,100 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
484 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", 717 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
485 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); 718 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
486 719
487 unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name); 720 spin_lock(&sbi->fs_lock);
488 if (unhashed) 721 active = autofs4_lookup_active(dentry);
489 dentry = unhashed; 722 if (active) {
490 else { 723 dentry = active;
491 /* 724 ino = autofs4_dentry_ino(dentry);
492 * Mark the dentry incomplete but don't hash it. We do this 725 /* If this came from revalidate, rehash it */
493 * to serialize our inode creation operations (symlink and 726 autofs4_revalidate_rehash(dentry);
494 * mkdir) which prevents deadlock during the callback to 727 spin_unlock(&sbi->fs_lock);
495 * the daemon. Subsequent user space lookups for the same 728 } else {
496 * dentry are placed on the wait queue while the daemon 729 spin_unlock(&sbi->fs_lock);
497 * itself is allowed passage unresticted so the create 730 ino = init_new_dentry(sbi, dentry, oz_mode);
498 * operation itself can then hash the dentry. Finally, 731 if (IS_ERR(ino))
499 * we check for the hashed dentry and return the newly 732 return (struct dentry *) ino;
500 * hashed dentry.
501 */
502 dentry->d_op = &autofs4_root_dentry_operations;
503
504 /*
505 * And we need to ensure that the same dentry is used for
506 * all following lookup calls until it is hashed so that
507 * the dentry flags are persistent throughout the request.
508 */
509 ino = autofs4_init_ino(NULL, sbi, 0555);
510 if (!ino)
511 return ERR_PTR(-ENOMEM);
512
513 dentry->d_fsdata = ino;
514 ino->dentry = dentry;
515
516 spin_lock(&sbi->lookup_lock);
517 list_add(&ino->active, &sbi->active_list);
518 spin_unlock(&sbi->lookup_lock);
519
520 d_instantiate(dentry, NULL);
521 } 733 }
522 734
735 autofs4_add_active(dentry);
736
523 if (!oz_mode) { 737 if (!oz_mode) {
738 expiring = autofs4_lookup_expiring(dentry);
524 mutex_unlock(&dir->i_mutex); 739 mutex_unlock(&dir->i_mutex);
525 expiring = autofs4_lookup_expiring(sbi,
526 dentry->d_parent,
527 &dentry->d_name);
528 if (expiring) { 740 if (expiring) {
529 /* 741 /*
530 * If we are racing with expire the request might not 742 * If we are racing with expire the request might not
531 * be quite complete but the directory has been removed 743 * be quite complete but the directory has been removed
532 * so it must have been successful, so just wait for it. 744 * so it must have been successful, so just wait for it.
533 */ 745 */
534 ino = autofs4_dentry_ino(expiring);
535 autofs4_expire_wait(expiring); 746 autofs4_expire_wait(expiring);
536 spin_lock(&sbi->lookup_lock);
537 if (!list_empty(&ino->expiring))
538 list_del_init(&ino->expiring);
539 spin_unlock(&sbi->lookup_lock);
540 dput(expiring); 747 dput(expiring);
541 } 748 }
542 749 status = try_to_fill_dentry(dentry);
543 spin_lock(&dentry->d_lock);
544 dentry->d_flags |= DCACHE_AUTOFS_PENDING;
545 spin_unlock(&dentry->d_lock);
546 if (dentry->d_op && dentry->d_op->d_revalidate)
547 (dentry->d_op->d_revalidate)(dentry, nd);
548 mutex_lock(&dir->i_mutex); 750 mutex_lock(&dir->i_mutex);
751 spin_lock(&sbi->fs_lock);
752 ino->flags &= ~AUTOFS_INF_PENDING;
753 spin_unlock(&sbi->fs_lock);
549 } 754 }
550 755
756 autofs4_del_active(dentry);
757
551 /* 758 /*
552 * If we are still pending, check if we had to handle 759 * If we had a mount fail, check if we had to handle
553 * a signal. If so we can force a restart.. 760 * a signal. If so we can force a restart..
554 */ 761 */
555 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) { 762 if (status) {
556 /* See if we were interrupted */ 763 /* See if we were interrupted */
557 if (signal_pending(current)) { 764 if (signal_pending(current)) {
558 sigset_t *sigset = &current->pending.signal; 765 sigset_t *sigset = &current->pending.signal;
559 if (sigismember (sigset, SIGKILL) || 766 if (sigismember (sigset, SIGKILL) ||
560 sigismember (sigset, SIGQUIT) || 767 sigismember (sigset, SIGQUIT) ||
561 sigismember (sigset, SIGINT)) { 768 sigismember (sigset, SIGINT)) {
562 if (unhashed) 769 if (active)
563 dput(unhashed); 770 dput(active);
564 return ERR_PTR(-ERESTARTNOINTR); 771 return ERR_PTR(-ERESTARTNOINTR);
565 } 772 }
566 } 773 }
567 if (!oz_mode) { 774 }
568 spin_lock(&dentry->d_lock); 775
569 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 776 /*
570 spin_unlock(&dentry->d_lock); 777 * User space can (and has done in the past) remove and re-create
778 * this directory during the callback. This can leave us with an
779 * unhashed dentry, but a successful mount! So we need to
780 * perform another cached lookup in case the dentry now exists.
781 */
782 if (!oz_mode && !have_submounts(dentry)) {
783 struct dentry *new;
784 new = d_lookup(dentry->d_parent, &dentry->d_name);
785 if (new) {
786 if (active)
787 dput(active);
788 return new;
789 } else {
790 if (!status)
791 status = -ENOENT;
571 } 792 }
572 } 793 }
573 794
574 /* 795 /*
575 * If this dentry is unhashed, then we shouldn't honour this 796 * If we had a mount failure, return status to user space.
576 * lookup. Returning ENOENT here doesn't do the right thing 797 * If the mount succeeded and we used a dentry from the active queue
577 * for all system calls, but it should be OK for the operations 798 * return it.
578 * we permit from an autofs.
579 */ 799 */
580 if (!oz_mode && d_unhashed(dentry)) { 800 if (status) {
801 dentry = ERR_PTR(status);
802 if (active)
803 dput(active);
804 return dentry;
805 } else {
581 /* 806 /*
582 * A user space application can (and has done in the past) 807 * Valid successful mount, return active dentry or NULL
583 * remove and re-create this directory during the callback. 808 * for a new dentry.
584 * This can leave us with an unhashed dentry, but a
585 * successful mount! So we need to perform another
586 * cached lookup in case the dentry now exists.
587 */ 809 */
588 struct dentry *parent = dentry->d_parent; 810 if (active)
589 struct dentry *new = d_lookup(parent, &dentry->d_name); 811 return active;
590 if (new != NULL)
591 dentry = new;
592 else
593 dentry = ERR_PTR(-ENOENT);
594
595 if (unhashed)
596 dput(unhashed);
597
598 return dentry;
599 } 812 }
600 813
601 if (unhashed)
602 return unhashed;
603
604 return NULL; 814 return NULL;
605} 815}
606 816
@@ -624,11 +834,6 @@ static int autofs4_dir_symlink(struct inode *dir,
624 if (!ino) 834 if (!ino)
625 return -ENOMEM; 835 return -ENOMEM;
626 836
627 spin_lock(&sbi->lookup_lock);
628 if (!list_empty(&ino->active))
629 list_del_init(&ino->active);
630 spin_unlock(&sbi->lookup_lock);
631
632 ino->size = strlen(symname); 837 ino->size = strlen(symname);
633 cp = kmalloc(ino->size + 1, GFP_KERNEL); 838 cp = kmalloc(ino->size + 1, GFP_KERNEL);
634 if (!cp) { 839 if (!cp) {
@@ -705,10 +910,6 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
705 dir->i_mtime = CURRENT_TIME; 910 dir->i_mtime = CURRENT_TIME;
706 911
707 spin_lock(&dcache_lock); 912 spin_lock(&dcache_lock);
708 spin_lock(&sbi->lookup_lock);
709 if (list_empty(&ino->expiring))
710 list_add(&ino->expiring, &sbi->expiring_list);
711 spin_unlock(&sbi->lookup_lock);
712 spin_lock(&dentry->d_lock); 913 spin_lock(&dentry->d_lock);
713 __d_drop(dentry); 914 __d_drop(dentry);
714 spin_unlock(&dentry->d_lock); 915 spin_unlock(&dentry->d_lock);
@@ -734,10 +935,6 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
734 spin_unlock(&dcache_lock); 935 spin_unlock(&dcache_lock);
735 return -ENOTEMPTY; 936 return -ENOTEMPTY;
736 } 937 }
737 spin_lock(&sbi->lookup_lock);
738 if (list_empty(&ino->expiring))
739 list_add(&ino->expiring, &sbi->expiring_list);
740 spin_unlock(&sbi->lookup_lock);
741 spin_lock(&dentry->d_lock); 938 spin_lock(&dentry->d_lock);
742 __d_drop(dentry); 939 __d_drop(dentry);
743 spin_unlock(&dentry->d_lock); 940 spin_unlock(&dentry->d_lock);
@@ -775,11 +972,6 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
775 if (!ino) 972 if (!ino)
776 return -ENOMEM; 973 return -ENOMEM;
777 974
778 spin_lock(&sbi->lookup_lock);
779 if (!list_empty(&ino->active))
780 list_del_init(&ino->active);
781 spin_unlock(&sbi->lookup_lock);
782
783 inode = autofs4_get_inode(dir->i_sb, ino); 975 inode = autofs4_get_inode(dir->i_sb, ino);
784 if (!inode) { 976 if (!inode) {
785 if (!dentry->d_fsdata) 977 if (!dentry->d_fsdata)
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index b639dcf7c778..346b69405363 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -32,7 +32,7 @@
32 32
33static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); 33static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
34static int load_aout_library(struct file*); 34static int load_aout_library(struct file*);
35static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); 35static int aout_core_dump(struct coredump_params *cprm);
36 36
37static struct linux_binfmt aout_format = { 37static struct linux_binfmt aout_format = {
38 .module = THIS_MODULE, 38 .module = THIS_MODULE,
@@ -89,8 +89,9 @@ if (file->f_op->llseek) { \
89 * dumping of the process results in another error.. 89 * dumping of the process results in another error..
90 */ 90 */
91 91
92static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) 92static int aout_core_dump(struct coredump_params *cprm)
93{ 93{
94 struct file *file = cprm->file;
94 mm_segment_t fs; 95 mm_segment_t fs;
95 int has_dumped = 0; 96 int has_dumped = 0;
96 unsigned long dump_start, dump_size; 97 unsigned long dump_start, dump_size;
@@ -108,16 +109,16 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
108 current->flags |= PF_DUMPCORE; 109 current->flags |= PF_DUMPCORE;
109 strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm)); 110 strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
110 dump.u_ar0 = offsetof(struct user, regs); 111 dump.u_ar0 = offsetof(struct user, regs);
111 dump.signal = signr; 112 dump.signal = cprm->signr;
112 aout_dump_thread(regs, &dump); 113 aout_dump_thread(cprm->regs, &dump);
113 114
114/* If the size of the dump file exceeds the rlimit, then see what would happen 115/* If the size of the dump file exceeds the rlimit, then see what would happen
115 if we wrote the stack, but not the data area. */ 116 if we wrote the stack, but not the data area. */
116 if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit) 117 if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > cprm->limit)
117 dump.u_dsize = 0; 118 dump.u_dsize = 0;
118 119
119/* Make sure we have enough room to write the stack and data areas. */ 120/* Make sure we have enough room to write the stack and data areas. */
120 if ((dump.u_ssize + 1) * PAGE_SIZE > limit) 121 if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
121 dump.u_ssize = 0; 122 dump.u_ssize = 0;
122 123
123/* make sure we actually have a data and stack area to dump */ 124/* make sure we actually have a data and stack area to dump */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d15ea1790bfb..edd90c49003c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -44,8 +44,8 @@ static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
44 * If we don't support core dumping, then supply a NULL so we 44 * If we don't support core dumping, then supply a NULL so we
45 * don't even try. 45 * don't even try.
46 */ 46 */
47#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 47#ifdef CONFIG_ELF_CORE
48static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); 48static int elf_core_dump(struct coredump_params *cprm);
49#else 49#else
50#define elf_core_dump NULL 50#define elf_core_dump NULL
51#endif 51#endif
@@ -1101,12 +1101,7 @@ out:
1101 return error; 1101 return error;
1102} 1102}
1103 1103
1104/* 1104#ifdef CONFIG_ELF_CORE
1105 * Note that some platforms still use traditional core dumps and not
1106 * the ELF core dump. Each platform can select it as appropriate.
1107 */
1108#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1109
1110/* 1105/*
1111 * ELF core dumper 1106 * ELF core dumper
1112 * 1107 *
@@ -1277,8 +1272,9 @@ static int writenote(struct memelfnote *men, struct file *file,
1277} 1272}
1278#undef DUMP_WRITE 1273#undef DUMP_WRITE
1279 1274
1280#define DUMP_WRITE(addr, nr) \ 1275#define DUMP_WRITE(addr, nr) \
1281 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ 1276 if ((size += (nr)) > cprm->limit || \
1277 !dump_write(cprm->file, (addr), (nr))) \
1282 goto end_coredump; 1278 goto end_coredump;
1283 1279
1284static void fill_elf_header(struct elfhdr *elf, int segs, 1280static void fill_elf_header(struct elfhdr *elf, int segs,
@@ -1906,7 +1902,7 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1906 * and then they are actually written out. If we run out of core limit 1902 * and then they are actually written out. If we run out of core limit
1907 * we just truncate. 1903 * we just truncate.
1908 */ 1904 */
1909static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) 1905static int elf_core_dump(struct coredump_params *cprm)
1910{ 1906{
1911 int has_dumped = 0; 1907 int has_dumped = 0;
1912 mm_segment_t fs; 1908 mm_segment_t fs;
@@ -1952,7 +1948,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
1952 * notes. This also sets up the file header. 1948 * notes. This also sets up the file header.
1953 */ 1949 */
1954 if (!fill_note_info(elf, segs + 1, /* including notes section */ 1950 if (!fill_note_info(elf, segs + 1, /* including notes section */
1955 &info, signr, regs)) 1951 &info, cprm->signr, cprm->regs))
1956 goto cleanup; 1952 goto cleanup;
1957 1953
1958 has_dumped = 1; 1954 has_dumped = 1;
@@ -2014,14 +2010,14 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2014#endif 2010#endif
2015 2011
2016 /* write out the notes section */ 2012 /* write out the notes section */
2017 if (!write_note_info(&info, file, &foffset)) 2013 if (!write_note_info(&info, cprm->file, &foffset))
2018 goto end_coredump; 2014 goto end_coredump;
2019 2015
2020 if (elf_coredump_extra_notes_write(file, &foffset)) 2016 if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2021 goto end_coredump; 2017 goto end_coredump;
2022 2018
2023 /* Align to page */ 2019 /* Align to page */
2024 if (!dump_seek(file, dataoff - foffset)) 2020 if (!dump_seek(cprm->file, dataoff - foffset))
2025 goto end_coredump; 2021 goto end_coredump;
2026 2022
2027 for (vma = first_vma(current, gate_vma); vma != NULL; 2023 for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2038,12 +2034,13 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2038 page = get_dump_page(addr); 2034 page = get_dump_page(addr);
2039 if (page) { 2035 if (page) {
2040 void *kaddr = kmap(page); 2036 void *kaddr = kmap(page);
2041 stop = ((size += PAGE_SIZE) > limit) || 2037 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2042 !dump_write(file, kaddr, PAGE_SIZE); 2038 !dump_write(cprm->file, kaddr,
2039 PAGE_SIZE);
2043 kunmap(page); 2040 kunmap(page);
2044 page_cache_release(page); 2041 page_cache_release(page);
2045 } else 2042 } else
2046 stop = !dump_seek(file, PAGE_SIZE); 2043 stop = !dump_seek(cprm->file, PAGE_SIZE);
2047 if (stop) 2044 if (stop)
2048 goto end_coredump; 2045 goto end_coredump;
2049 } 2046 }
@@ -2063,7 +2060,7 @@ out:
2063 return has_dumped; 2060 return has_dumped;
2064} 2061}
2065 2062
2066#endif /* USE_ELF_CORE_DUMP */ 2063#endif /* CONFIG_ELF_CORE */
2067 2064
2068static int __init init_elf_binfmt(void) 2065static int __init init_elf_binfmt(void)
2069{ 2066{
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 38502c67987c..c57d9ce5ff7e 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -75,14 +75,14 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *,
75static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *, 75static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
76 struct file *, struct mm_struct *); 76 struct file *, struct mm_struct *);
77 77
78#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 78#ifdef CONFIG_ELF_CORE
79static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *, unsigned long limit); 79static int elf_fdpic_core_dump(struct coredump_params *cprm);
80#endif 80#endif
81 81
82static struct linux_binfmt elf_fdpic_format = { 82static struct linux_binfmt elf_fdpic_format = {
83 .module = THIS_MODULE, 83 .module = THIS_MODULE,
84 .load_binary = load_elf_fdpic_binary, 84 .load_binary = load_elf_fdpic_binary,
85#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 85#ifdef CONFIG_ELF_CORE
86 .core_dump = elf_fdpic_core_dump, 86 .core_dump = elf_fdpic_core_dump,
87#endif 87#endif
88 .min_coredump = ELF_EXEC_PAGESIZE, 88 .min_coredump = ELF_EXEC_PAGESIZE,
@@ -171,6 +171,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
171#ifdef ELF_FDPIC_PLAT_INIT 171#ifdef ELF_FDPIC_PLAT_INIT
172 unsigned long dynaddr; 172 unsigned long dynaddr;
173#endif 173#endif
174#ifndef CONFIG_MMU
175 unsigned long stack_prot;
176#endif
174 struct file *interpreter = NULL; /* to shut gcc up */ 177 struct file *interpreter = NULL; /* to shut gcc up */
175 char *interpreter_name = NULL; 178 char *interpreter_name = NULL;
176 int executable_stack; 179 int executable_stack;
@@ -316,6 +319,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
316 * defunct, deceased, etc. after this point we have to exit via 319 * defunct, deceased, etc. after this point we have to exit via
317 * error_kill */ 320 * error_kill */
318 set_personality(PER_LINUX_FDPIC); 321 set_personality(PER_LINUX_FDPIC);
322 if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
323 current->personality |= READ_IMPLIES_EXEC;
319 set_binfmt(&elf_fdpic_format); 324 set_binfmt(&elf_fdpic_format);
320 325
321 current->mm->start_code = 0; 326 current->mm->start_code = 0;
@@ -377,10 +382,15 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
377 if (stack_size < PAGE_SIZE * 2) 382 if (stack_size < PAGE_SIZE * 2)
378 stack_size = PAGE_SIZE * 2; 383 stack_size = PAGE_SIZE * 2;
379 384
385 stack_prot = PROT_READ | PROT_WRITE;
386 if (executable_stack == EXSTACK_ENABLE_X ||
387 (executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC))
388 stack_prot |= PROT_EXEC;
389
380 down_write(&current->mm->mmap_sem); 390 down_write(&current->mm->mmap_sem);
381 current->mm->start_brk = do_mmap(NULL, 0, stack_size, 391 current->mm->start_brk = do_mmap(NULL, 0, stack_size, stack_prot,
382 PROT_READ | PROT_WRITE | PROT_EXEC, 392 MAP_PRIVATE | MAP_ANONYMOUS |
383 MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, 393 MAP_UNINITIALIZED | MAP_GROWSDOWN,
384 0); 394 0);
385 395
386 if (IS_ERR_VALUE(current->mm->start_brk)) { 396 if (IS_ERR_VALUE(current->mm->start_brk)) {
@@ -1200,7 +1210,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1200 * 1210 *
1201 * Modelled on fs/binfmt_elf.c core dumper 1211 * Modelled on fs/binfmt_elf.c core dumper
1202 */ 1212 */
1203#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 1213#ifdef CONFIG_ELF_CORE
1204 1214
1205/* 1215/*
1206 * These are the only things you should do on a core-file: use only these 1216 * These are the only things you should do on a core-file: use only these
@@ -1325,8 +1335,9 @@ static int writenote(struct memelfnote *men, struct file *file)
1325#undef DUMP_WRITE 1335#undef DUMP_WRITE
1326#undef DUMP_SEEK 1336#undef DUMP_SEEK
1327 1337
1328#define DUMP_WRITE(addr, nr) \ 1338#define DUMP_WRITE(addr, nr) \
1329 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ 1339 if ((size += (nr)) > cprm->limit || \
1340 !dump_write(cprm->file, (addr), (nr))) \
1330 goto end_coredump; 1341 goto end_coredump;
1331 1342
1332static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs) 1343static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
@@ -1581,8 +1592,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1581 * and then they are actually written out. If we run out of core limit 1592 * and then they are actually written out. If we run out of core limit
1582 * we just truncate. 1593 * we just truncate.
1583 */ 1594 */
1584static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, 1595static int elf_fdpic_core_dump(struct coredump_params *cprm)
1585 struct file *file, unsigned long limit)
1586{ 1596{
1587#define NUM_NOTES 6 1597#define NUM_NOTES 6
1588 int has_dumped = 0; 1598 int has_dumped = 0;
@@ -1641,7 +1651,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1641 goto cleanup; 1651 goto cleanup;
1642#endif 1652#endif
1643 1653
1644 if (signr) { 1654 if (cprm->signr) {
1645 struct core_thread *ct; 1655 struct core_thread *ct;
1646 struct elf_thread_status *tmp; 1656 struct elf_thread_status *tmp;
1647 1657
@@ -1660,14 +1670,14 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1660 int sz; 1670 int sz;
1661 1671
1662 tmp = list_entry(t, struct elf_thread_status, list); 1672 tmp = list_entry(t, struct elf_thread_status, list);
1663 sz = elf_dump_thread_status(signr, tmp); 1673 sz = elf_dump_thread_status(cprm->signr, tmp);
1664 thread_status_size += sz; 1674 thread_status_size += sz;
1665 } 1675 }
1666 } 1676 }
1667 1677
1668 /* now collect the dump for the current */ 1678 /* now collect the dump for the current */
1669 fill_prstatus(prstatus, current, signr); 1679 fill_prstatus(prstatus, current, cprm->signr);
1670 elf_core_copy_regs(&prstatus->pr_reg, regs); 1680 elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
1671 1681
1672 segs = current->mm->map_count; 1682 segs = current->mm->map_count;
1673#ifdef ELF_CORE_EXTRA_PHDRS 1683#ifdef ELF_CORE_EXTRA_PHDRS
@@ -1702,7 +1712,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1702 1712
1703 /* Try to dump the FPU. */ 1713 /* Try to dump the FPU. */
1704 if ((prstatus->pr_fpvalid = 1714 if ((prstatus->pr_fpvalid =
1705 elf_core_copy_task_fpregs(current, regs, fpu))) 1715 elf_core_copy_task_fpregs(current, cprm->regs, fpu)))
1706 fill_note(notes + numnote++, 1716 fill_note(notes + numnote++,
1707 "CORE", NT_PRFPREG, sizeof(*fpu), fpu); 1717 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1708#ifdef ELF_CORE_COPY_XFPREGS 1718#ifdef ELF_CORE_COPY_XFPREGS
@@ -1773,7 +1783,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1773 1783
1774 /* write out the notes section */ 1784 /* write out the notes section */
1775 for (i = 0; i < numnote; i++) 1785 for (i = 0; i < numnote; i++)
1776 if (!writenote(notes + i, file)) 1786 if (!writenote(notes + i, cprm->file))
1777 goto end_coredump; 1787 goto end_coredump;
1778 1788
1779 /* write out the thread status notes section */ 1789 /* write out the thread status notes section */
@@ -1782,25 +1792,26 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1782 list_entry(t, struct elf_thread_status, list); 1792 list_entry(t, struct elf_thread_status, list);
1783 1793
1784 for (i = 0; i < tmp->num_notes; i++) 1794 for (i = 0; i < tmp->num_notes; i++)
1785 if (!writenote(&tmp->notes[i], file)) 1795 if (!writenote(&tmp->notes[i], cprm->file))
1786 goto end_coredump; 1796 goto end_coredump;
1787 } 1797 }
1788 1798
1789 if (!dump_seek(file, dataoff)) 1799 if (!dump_seek(cprm->file, dataoff))
1790 goto end_coredump; 1800 goto end_coredump;
1791 1801
1792 if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0) 1802 if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
1803 mm_flags) < 0)
1793 goto end_coredump; 1804 goto end_coredump;
1794 1805
1795#ifdef ELF_CORE_WRITE_EXTRA_DATA 1806#ifdef ELF_CORE_WRITE_EXTRA_DATA
1796 ELF_CORE_WRITE_EXTRA_DATA; 1807 ELF_CORE_WRITE_EXTRA_DATA;
1797#endif 1808#endif
1798 1809
1799 if (file->f_pos != offset) { 1810 if (cprm->file->f_pos != offset) {
1800 /* Sanity check */ 1811 /* Sanity check */
1801 printk(KERN_WARNING 1812 printk(KERN_WARNING
1802 "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n", 1813 "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n",
1803 file->f_pos, offset); 1814 cprm->file->f_pos, offset);
1804 } 1815 }
1805 1816
1806end_coredump: 1817end_coredump:
@@ -1825,4 +1836,4 @@ cleanup:
1825#undef NUM_NOTES 1836#undef NUM_NOTES
1826} 1837}
1827 1838
1828#endif /* USE_ELF_CORE_DUMP */ 1839#endif /* CONFIG_ELF_CORE */
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a2796651e756..d4a00ea1054c 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -87,7 +87,7 @@ static int load_flat_shared_library(int id, struct lib_info *p);
87#endif 87#endif
88 88
89static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs); 89static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
90static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); 90static int flat_core_dump(struct coredump_params *cprm);
91 91
92static struct linux_binfmt flat_format = { 92static struct linux_binfmt flat_format = {
93 .module = THIS_MODULE, 93 .module = THIS_MODULE,
@@ -102,10 +102,10 @@ static struct linux_binfmt flat_format = {
102 * Currently only a stub-function. 102 * Currently only a stub-function.
103 */ 103 */
104 104
105static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) 105static int flat_core_dump(struct coredump_params *cprm)
106{ 106{
107 printk("Process %s:%d received signr %d and should have core dumped\n", 107 printk("Process %s:%d received signr %d and should have core dumped\n",
108 current->comm, current->pid, (int) signr); 108 current->comm, current->pid, (int) cprm->signr);
109 return(1); 109 return(1);
110} 110}
111 111
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index eff74b9c9e77..2a9b5330cc5e 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -43,7 +43,7 @@ static int load_som_library(struct file *);
43 * don't even try. 43 * don't even try.
44 */ 44 */
45#if 0 45#if 0
46static int som_core_dump(long signr, struct pt_regs *regs, unsigned long limit); 46static int som_core_dump(struct coredump_params *cprm);
47#else 47#else
48#define som_core_dump NULL 48#define som_core_dump NULL
49#endif 49#endif
diff --git a/fs/bio.c b/fs/bio.c
index 76e6713abf94..12429c9553eb 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -78,7 +78,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
78 78
79 i = 0; 79 i = 0;
80 while (i < bio_slab_nr) { 80 while (i < bio_slab_nr) {
81 struct bio_slab *bslab = &bio_slabs[i]; 81 bslab = &bio_slabs[i];
82 82
83 if (!bslab->slab && entry == -1) 83 if (!bslab->slab && entry == -1)
84 entry = i; 84 entry = i;
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 361604244271..54f4798ab46a 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -73,13 +73,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
73 return acl; 73 return acl;
74} 74}
75 75
76static int btrfs_xattr_get_acl(struct inode *inode, int type, 76static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
77 void *value, size_t size) 77 void *value, size_t size, int type)
78{ 78{
79 struct posix_acl *acl; 79 struct posix_acl *acl;
80 int ret = 0; 80 int ret = 0;
81 81
82 acl = btrfs_get_acl(inode, type); 82 acl = btrfs_get_acl(dentry->d_inode, type);
83 83
84 if (IS_ERR(acl)) 84 if (IS_ERR(acl))
85 return PTR_ERR(acl); 85 return PTR_ERR(acl);
@@ -94,7 +94,8 @@ static int btrfs_xattr_get_acl(struct inode *inode, int type,
94/* 94/*
95 * Needs to be called with fs_mutex held 95 * Needs to be called with fs_mutex held
96 */ 96 */
97static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) 97static int btrfs_set_acl(struct btrfs_trans_handle *trans,
98 struct inode *inode, struct posix_acl *acl, int type)
98{ 99{
99 int ret, size = 0; 100 int ret, size = 0;
100 const char *name; 101 const char *name;
@@ -111,12 +112,14 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
111 switch (type) { 112 switch (type) {
112 case ACL_TYPE_ACCESS: 113 case ACL_TYPE_ACCESS:
113 mode = inode->i_mode; 114 mode = inode->i_mode;
114 ret = posix_acl_equiv_mode(acl, &mode);
115 if (ret < 0)
116 return ret;
117 ret = 0;
118 inode->i_mode = mode;
119 name = POSIX_ACL_XATTR_ACCESS; 115 name = POSIX_ACL_XATTR_ACCESS;
116 if (acl) {
117 ret = posix_acl_equiv_mode(acl, &mode);
118 if (ret < 0)
119 return ret;
120 inode->i_mode = mode;
121 }
122 ret = 0;
120 break; 123 break;
121 case ACL_TYPE_DEFAULT: 124 case ACL_TYPE_DEFAULT:
122 if (!S_ISDIR(inode->i_mode)) 125 if (!S_ISDIR(inode->i_mode))
@@ -140,8 +143,7 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
140 goto out; 143 goto out;
141 } 144 }
142 145
143 ret = __btrfs_setxattr(inode, name, value, size, 0); 146 ret = __btrfs_setxattr(trans, inode, name, value, size, 0);
144
145out: 147out:
146 kfree(value); 148 kfree(value);
147 149
@@ -151,10 +153,10 @@ out:
151 return ret; 153 return ret;
152} 154}
153 155
154static int btrfs_xattr_set_acl(struct inode *inode, int type, 156static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
155 const void *value, size_t size) 157 const void *value, size_t size, int flags, int type)
156{ 158{
157 int ret = 0; 159 int ret;
158 struct posix_acl *acl = NULL; 160 struct posix_acl *acl = NULL;
159 161
160 if (value) { 162 if (value) {
@@ -167,38 +169,13 @@ static int btrfs_xattr_set_acl(struct inode *inode, int type,
167 } 169 }
168 } 170 }
169 171
170 ret = btrfs_set_acl(inode, acl, type); 172 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
171 173
172 posix_acl_release(acl); 174 posix_acl_release(acl);
173 175
174 return ret; 176 return ret;
175} 177}
176 178
177
178static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
179 void *value, size_t size)
180{
181 return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
182}
183
184static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
185 const void *value, size_t size, int flags)
186{
187 return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
188}
189
190static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
191 void *value, size_t size)
192{
193 return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
194}
195
196static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
197 const void *value, size_t size, int flags)
198{
199 return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
200}
201
202int btrfs_check_acl(struct inode *inode, int mask) 179int btrfs_check_acl(struct inode *inode, int mask)
203{ 180{
204 struct posix_acl *acl; 181 struct posix_acl *acl;
@@ -221,7 +198,8 @@ int btrfs_check_acl(struct inode *inode, int mask)
221 * stuff has been fixed to work with that. If the locking stuff changes, we 198 * stuff has been fixed to work with that. If the locking stuff changes, we
222 * need to re-evaluate the acl locking stuff. 199 * need to re-evaluate the acl locking stuff.
223 */ 200 */
224int btrfs_init_acl(struct inode *inode, struct inode *dir) 201int btrfs_init_acl(struct btrfs_trans_handle *trans,
202 struct inode *inode, struct inode *dir)
225{ 203{
226 struct posix_acl *acl = NULL; 204 struct posix_acl *acl = NULL;
227 int ret = 0; 205 int ret = 0;
@@ -246,7 +224,8 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
246 mode_t mode; 224 mode_t mode;
247 225
248 if (S_ISDIR(inode->i_mode)) { 226 if (S_ISDIR(inode->i_mode)) {
249 ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT); 227 ret = btrfs_set_acl(trans, inode, acl,
228 ACL_TYPE_DEFAULT);
250 if (ret) 229 if (ret)
251 goto failed; 230 goto failed;
252 } 231 }
@@ -261,7 +240,7 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
261 inode->i_mode = mode; 240 inode->i_mode = mode;
262 if (ret > 0) { 241 if (ret > 0) {
263 /* we need an acl */ 242 /* we need an acl */
264 ret = btrfs_set_acl(inode, clone, 243 ret = btrfs_set_acl(trans, inode, clone,
265 ACL_TYPE_ACCESS); 244 ACL_TYPE_ACCESS);
266 } 245 }
267 } 246 }
@@ -294,7 +273,7 @@ int btrfs_acl_chmod(struct inode *inode)
294 273
295 ret = posix_acl_chmod_masq(clone, inode->i_mode); 274 ret = posix_acl_chmod_masq(clone, inode->i_mode);
296 if (!ret) 275 if (!ret)
297 ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS); 276 ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS);
298 277
299 posix_acl_release(clone); 278 posix_acl_release(clone);
300 279
@@ -303,14 +282,16 @@ int btrfs_acl_chmod(struct inode *inode)
303 282
304struct xattr_handler btrfs_xattr_acl_default_handler = { 283struct xattr_handler btrfs_xattr_acl_default_handler = {
305 .prefix = POSIX_ACL_XATTR_DEFAULT, 284 .prefix = POSIX_ACL_XATTR_DEFAULT,
306 .get = btrfs_xattr_acl_default_get, 285 .flags = ACL_TYPE_DEFAULT,
307 .set = btrfs_xattr_acl_default_set, 286 .get = btrfs_xattr_acl_get,
287 .set = btrfs_xattr_acl_set,
308}; 288};
309 289
310struct xattr_handler btrfs_xattr_acl_access_handler = { 290struct xattr_handler btrfs_xattr_acl_access_handler = {
311 .prefix = POSIX_ACL_XATTR_ACCESS, 291 .prefix = POSIX_ACL_XATTR_ACCESS,
312 .get = btrfs_xattr_acl_access_get, 292 .flags = ACL_TYPE_ACCESS,
313 .set = btrfs_xattr_acl_access_set, 293 .get = btrfs_xattr_acl_get,
294 .set = btrfs_xattr_acl_set,
314}; 295};
315 296
316#else /* CONFIG_BTRFS_FS_POSIX_ACL */ 297#else /* CONFIG_BTRFS_FS_POSIX_ACL */
@@ -320,7 +301,8 @@ int btrfs_acl_chmod(struct inode *inode)
320 return 0; 301 return 0;
321} 302}
322 303
323int btrfs_init_acl(struct inode *inode, struct inode *dir) 304int btrfs_init_acl(struct btrfs_trans_handle *trans,
305 struct inode *inode, struct inode *dir)
324{ 306{
325 return 0; 307 return 0;
326} 308}
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index f6783a42f010..3f1f50d9d916 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,9 +44,6 @@ struct btrfs_inode {
44 */ 44 */
45 struct extent_io_tree io_failure_tree; 45 struct extent_io_tree io_failure_tree;
46 46
47 /* held while inesrting or deleting extents from files */
48 struct mutex extent_mutex;
49
50 /* held while logging the inode in tree-log.c */ 47 /* held while logging the inode in tree-log.c */
51 struct mutex log_mutex; 48 struct mutex log_mutex;
52 49
@@ -166,7 +163,7 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
166 163
167static inline void btrfs_i_size_write(struct inode *inode, u64 size) 164static inline void btrfs_i_size_write(struct inode *inode, u64 size)
168{ 165{
169 inode->i_size = size; 166 i_size_write(inode, size);
170 BTRFS_I(inode)->disk_i_size = size; 167 BTRFS_I(inode)->disk_i_size = size;
171} 168}
172 169
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ec96f3a6d536..c4bc570a396e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -37,6 +37,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
37 struct extent_buffer *src_buf); 37 struct extent_buffer *src_buf);
38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
39 struct btrfs_path *path, int level, int slot); 39 struct btrfs_path *path, int level, int slot);
40static int setup_items_for_insert(struct btrfs_trans_handle *trans,
41 struct btrfs_root *root, struct btrfs_path *path,
42 struct btrfs_key *cpu_key, u32 *data_size,
43 u32 total_data, u32 total_size, int nr);
44
40 45
41struct btrfs_path *btrfs_alloc_path(void) 46struct btrfs_path *btrfs_alloc_path(void)
42{ 47{
@@ -451,9 +456,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
451 extent_buffer_get(cow); 456 extent_buffer_get(cow);
452 spin_unlock(&root->node_lock); 457 spin_unlock(&root->node_lock);
453 458
454 btrfs_free_extent(trans, root, buf->start, buf->len, 459 btrfs_free_tree_block(trans, root, buf->start, buf->len,
455 parent_start, root->root_key.objectid, 460 parent_start, root->root_key.objectid, level);
456 level, 0);
457 free_extent_buffer(buf); 461 free_extent_buffer(buf);
458 add_root_to_dirty_list(root); 462 add_root_to_dirty_list(root);
459 } else { 463 } else {
@@ -468,9 +472,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
468 btrfs_set_node_ptr_generation(parent, parent_slot, 472 btrfs_set_node_ptr_generation(parent, parent_slot,
469 trans->transid); 473 trans->transid);
470 btrfs_mark_buffer_dirty(parent); 474 btrfs_mark_buffer_dirty(parent);
471 btrfs_free_extent(trans, root, buf->start, buf->len, 475 btrfs_free_tree_block(trans, root, buf->start, buf->len,
472 parent_start, root->root_key.objectid, 476 parent_start, root->root_key.objectid, level);
473 level, 0);
474 } 477 }
475 if (unlock_orig) 478 if (unlock_orig)
476 btrfs_tree_unlock(buf); 479 btrfs_tree_unlock(buf);
@@ -1030,8 +1033,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1030 btrfs_tree_unlock(mid); 1033 btrfs_tree_unlock(mid);
1031 /* once for the path */ 1034 /* once for the path */
1032 free_extent_buffer(mid); 1035 free_extent_buffer(mid);
1033 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1036 ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
1034 0, root->root_key.objectid, level, 1); 1037 0, root->root_key.objectid, level);
1035 /* once for the root ptr */ 1038 /* once for the root ptr */
1036 free_extent_buffer(mid); 1039 free_extent_buffer(mid);
1037 return ret; 1040 return ret;
@@ -1095,10 +1098,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1095 1); 1098 1);
1096 if (wret) 1099 if (wret)
1097 ret = wret; 1100 ret = wret;
1098 wret = btrfs_free_extent(trans, root, bytenr, 1101 wret = btrfs_free_tree_block(trans, root,
1099 blocksize, 0, 1102 bytenr, blocksize, 0,
1100 root->root_key.objectid, 1103 root->root_key.objectid,
1101 level, 0); 1104 level);
1102 if (wret) 1105 if (wret)
1103 ret = wret; 1106 ret = wret;
1104 } else { 1107 } else {
@@ -1143,9 +1146,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1143 wret = del_ptr(trans, root, path, level + 1, pslot); 1146 wret = del_ptr(trans, root, path, level + 1, pslot);
1144 if (wret) 1147 if (wret)
1145 ret = wret; 1148 ret = wret;
1146 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1149 wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
1147 0, root->root_key.objectid, 1150 0, root->root_key.objectid, level);
1148 level, 0);
1149 if (wret) 1151 if (wret)
1150 ret = wret; 1152 ret = wret;
1151 } else { 1153 } else {
@@ -2997,75 +2999,85 @@ again:
2997 return ret; 2999 return ret;
2998} 3000}
2999 3001
3000/* 3002static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3001 * This function splits a single item into two items, 3003 struct btrfs_root *root,
3002 * giving 'new_key' to the new item and splitting the 3004 struct btrfs_path *path, int ins_len)
3003 * old one at split_offset (from the start of the item).
3004 *
3005 * The path may be released by this operation. After
3006 * the split, the path is pointing to the old item. The
3007 * new item is going to be in the same node as the old one.
3008 *
3009 * Note, the item being split must be smaller enough to live alone on
3010 * a tree block with room for one extra struct btrfs_item
3011 *
3012 * This allows us to split the item in place, keeping a lock on the
3013 * leaf the entire time.
3014 */
3015int btrfs_split_item(struct btrfs_trans_handle *trans,
3016 struct btrfs_root *root,
3017 struct btrfs_path *path,
3018 struct btrfs_key *new_key,
3019 unsigned long split_offset)
3020{ 3005{
3021 u32 item_size; 3006 struct btrfs_key key;
3022 struct extent_buffer *leaf; 3007 struct extent_buffer *leaf;
3023 struct btrfs_key orig_key; 3008 struct btrfs_file_extent_item *fi;
3024 struct btrfs_item *item; 3009 u64 extent_len = 0;
3025 struct btrfs_item *new_item; 3010 u32 item_size;
3026 int ret = 0; 3011 int ret;
3027 int slot;
3028 u32 nritems;
3029 u32 orig_offset;
3030 struct btrfs_disk_key disk_key;
3031 char *buf;
3032 3012
3033 leaf = path->nodes[0]; 3013 leaf = path->nodes[0];
3034 btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]); 3014 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3035 if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item)) 3015
3036 goto split; 3016 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
3017 key.type != BTRFS_EXTENT_CSUM_KEY);
3018
3019 if (btrfs_leaf_free_space(root, leaf) >= ins_len)
3020 return 0;
3037 3021
3038 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3022 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3023 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3024 fi = btrfs_item_ptr(leaf, path->slots[0],
3025 struct btrfs_file_extent_item);
3026 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3027 }
3039 btrfs_release_path(root, path); 3028 btrfs_release_path(root, path);
3040 3029
3041 path->search_for_split = 1;
3042 path->keep_locks = 1; 3030 path->keep_locks = 1;
3043 3031 path->search_for_split = 1;
3044 ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1); 3032 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3045 path->search_for_split = 0; 3033 path->search_for_split = 0;
3034 if (ret < 0)
3035 goto err;
3046 3036
3037 ret = -EAGAIN;
3038 leaf = path->nodes[0];
3047 /* if our item isn't there or got smaller, return now */ 3039 /* if our item isn't there or got smaller, return now */
3048 if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0], 3040 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
3049 path->slots[0])) { 3041 goto err;
3050 path->keep_locks = 0; 3042
3051 return -EAGAIN; 3043 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3044 fi = btrfs_item_ptr(leaf, path->slots[0],
3045 struct btrfs_file_extent_item);
3046 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
3047 goto err;
3052 } 3048 }
3053 3049
3054 btrfs_set_path_blocking(path); 3050 btrfs_set_path_blocking(path);
3055 ret = split_leaf(trans, root, &orig_key, path, 3051 ret = split_leaf(trans, root, &key, path, ins_len, 1);
3056 sizeof(struct btrfs_item), 1);
3057 path->keep_locks = 0;
3058 BUG_ON(ret); 3052 BUG_ON(ret);
3059 3053
3054 path->keep_locks = 0;
3060 btrfs_unlock_up_safe(path, 1); 3055 btrfs_unlock_up_safe(path, 1);
3056 return 0;
3057err:
3058 path->keep_locks = 0;
3059 return ret;
3060}
3061
3062static noinline int split_item(struct btrfs_trans_handle *trans,
3063 struct btrfs_root *root,
3064 struct btrfs_path *path,
3065 struct btrfs_key *new_key,
3066 unsigned long split_offset)
3067{
3068 struct extent_buffer *leaf;
3069 struct btrfs_item *item;
3070 struct btrfs_item *new_item;
3071 int slot;
3072 char *buf;
3073 u32 nritems;
3074 u32 item_size;
3075 u32 orig_offset;
3076 struct btrfs_disk_key disk_key;
3077
3061 leaf = path->nodes[0]; 3078 leaf = path->nodes[0];
3062 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); 3079 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3063 3080
3064split:
3065 /*
3066 * make sure any changes to the path from split_leaf leave it
3067 * in a blocking state
3068 */
3069 btrfs_set_path_blocking(path); 3081 btrfs_set_path_blocking(path);
3070 3082
3071 item = btrfs_item_nr(leaf, path->slots[0]); 3083 item = btrfs_item_nr(leaf, path->slots[0]);
@@ -3073,19 +3085,19 @@ split:
3073 item_size = btrfs_item_size(leaf, item); 3085 item_size = btrfs_item_size(leaf, item);
3074 3086
3075 buf = kmalloc(item_size, GFP_NOFS); 3087 buf = kmalloc(item_size, GFP_NOFS);
3088 if (!buf)
3089 return -ENOMEM;
3090
3076 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 3091 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3077 path->slots[0]), item_size); 3092 path->slots[0]), item_size);
3078 slot = path->slots[0] + 1;
3079 leaf = path->nodes[0];
3080 3093
3094 slot = path->slots[0] + 1;
3081 nritems = btrfs_header_nritems(leaf); 3095 nritems = btrfs_header_nritems(leaf);
3082
3083 if (slot != nritems) { 3096 if (slot != nritems) {
3084 /* shift the items */ 3097 /* shift the items */
3085 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), 3098 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3086 btrfs_item_nr_offset(slot), 3099 btrfs_item_nr_offset(slot),
3087 (nritems - slot) * sizeof(struct btrfs_item)); 3100 (nritems - slot) * sizeof(struct btrfs_item));
3088
3089 } 3101 }
3090 3102
3091 btrfs_cpu_key_to_disk(&disk_key, new_key); 3103 btrfs_cpu_key_to_disk(&disk_key, new_key);
@@ -3113,16 +3125,81 @@ split:
3113 item_size - split_offset); 3125 item_size - split_offset);
3114 btrfs_mark_buffer_dirty(leaf); 3126 btrfs_mark_buffer_dirty(leaf);
3115 3127
3116 ret = 0; 3128 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
3117 if (btrfs_leaf_free_space(root, leaf) < 0) {
3118 btrfs_print_leaf(root, leaf);
3119 BUG();
3120 }
3121 kfree(buf); 3129 kfree(buf);
3130 return 0;
3131}
3132
3133/*
3134 * This function splits a single item into two items,
3135 * giving 'new_key' to the new item and splitting the
3136 * old one at split_offset (from the start of the item).
3137 *
3138 * The path may be released by this operation. After
3139 * the split, the path is pointing to the old item. The
3140 * new item is going to be in the same node as the old one.
3141 *
3142 * Note, the item being split must be smaller enough to live alone on
3143 * a tree block with room for one extra struct btrfs_item
3144 *
3145 * This allows us to split the item in place, keeping a lock on the
3146 * leaf the entire time.
3147 */
3148int btrfs_split_item(struct btrfs_trans_handle *trans,
3149 struct btrfs_root *root,
3150 struct btrfs_path *path,
3151 struct btrfs_key *new_key,
3152 unsigned long split_offset)
3153{
3154 int ret;
3155 ret = setup_leaf_for_split(trans, root, path,
3156 sizeof(struct btrfs_item));
3157 if (ret)
3158 return ret;
3159
3160 ret = split_item(trans, root, path, new_key, split_offset);
3122 return ret; 3161 return ret;
3123} 3162}
3124 3163
3125/* 3164/*
3165 * This function duplicate a item, giving 'new_key' to the new item.
3166 * It guarantees both items live in the same tree leaf and the new item
3167 * is contiguous with the original item.
3168 *
3169 * This allows us to split file extent in place, keeping a lock on the
3170 * leaf the entire time.
3171 */
3172int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3173 struct btrfs_root *root,
3174 struct btrfs_path *path,
3175 struct btrfs_key *new_key)
3176{
3177 struct extent_buffer *leaf;
3178 int ret;
3179 u32 item_size;
3180
3181 leaf = path->nodes[0];
3182 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3183 ret = setup_leaf_for_split(trans, root, path,
3184 item_size + sizeof(struct btrfs_item));
3185 if (ret)
3186 return ret;
3187
3188 path->slots[0]++;
3189 ret = setup_items_for_insert(trans, root, path, new_key, &item_size,
3190 item_size, item_size +
3191 sizeof(struct btrfs_item), 1);
3192 BUG_ON(ret);
3193
3194 leaf = path->nodes[0];
3195 memcpy_extent_buffer(leaf,
3196 btrfs_item_ptr_offset(leaf, path->slots[0]),
3197 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
3198 item_size);
3199 return 0;
3200}
3201
3202/*
3126 * make the item pointed to by the path smaller. new_size indicates 3203 * make the item pointed to by the path smaller. new_size indicates
3127 * how small to make it, and from_end tells us if we just chop bytes 3204 * how small to make it, and from_end tells us if we just chop bytes
3128 * off the end of the item or if we shift the item to chop bytes off 3205 * off the end of the item or if we shift the item to chop bytes off
@@ -3714,8 +3791,8 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3714 */ 3791 */
3715 btrfs_unlock_up_safe(path, 0); 3792 btrfs_unlock_up_safe(path, 0);
3716 3793
3717 ret = btrfs_free_extent(trans, root, leaf->start, leaf->len, 3794 ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
3718 0, root->root_key.objectid, 0, 0); 3795 0, root->root_key.objectid, 0);
3719 return ret; 3796 return ret;
3720} 3797}
3721/* 3798/*
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 444b3e9b92a4..9f806dd04c27 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -310,6 +310,9 @@ struct btrfs_header {
310#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ 310#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
311 sizeof(struct btrfs_item) - \ 311 sizeof(struct btrfs_item) - \
312 sizeof(struct btrfs_file_extent_item)) 312 sizeof(struct btrfs_file_extent_item))
313#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
314 sizeof(struct btrfs_item) -\
315 sizeof(struct btrfs_dir_item))
313 316
314 317
315/* 318/*
@@ -859,8 +862,9 @@ struct btrfs_fs_info {
859 struct mutex ordered_operations_mutex; 862 struct mutex ordered_operations_mutex;
860 struct rw_semaphore extent_commit_sem; 863 struct rw_semaphore extent_commit_sem;
861 864
862 struct rw_semaphore subvol_sem; 865 struct rw_semaphore cleanup_work_sem;
863 866
867 struct rw_semaphore subvol_sem;
864 struct srcu_struct subvol_srcu; 868 struct srcu_struct subvol_srcu;
865 869
866 struct list_head trans_list; 870 struct list_head trans_list;
@@ -868,6 +872,9 @@ struct btrfs_fs_info {
868 struct list_head dead_roots; 872 struct list_head dead_roots;
869 struct list_head caching_block_groups; 873 struct list_head caching_block_groups;
870 874
875 spinlock_t delayed_iput_lock;
876 struct list_head delayed_iputs;
877
871 atomic_t nr_async_submits; 878 atomic_t nr_async_submits;
872 atomic_t async_submit_draining; 879 atomic_t async_submit_draining;
873 atomic_t nr_async_bios; 880 atomic_t nr_async_bios;
@@ -1034,12 +1041,12 @@ struct btrfs_root {
1034 int ref_cows; 1041 int ref_cows;
1035 int track_dirty; 1042 int track_dirty;
1036 int in_radix; 1043 int in_radix;
1044 int clean_orphans;
1037 1045
1038 u64 defrag_trans_start; 1046 u64 defrag_trans_start;
1039 struct btrfs_key defrag_progress; 1047 struct btrfs_key defrag_progress;
1040 struct btrfs_key defrag_max; 1048 struct btrfs_key defrag_max;
1041 int defrag_running; 1049 int defrag_running;
1042 int defrag_level;
1043 char *name; 1050 char *name;
1044 int in_sysfs; 1051 int in_sysfs;
1045 1052
@@ -1975,6 +1982,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1975 u64 parent, u64 root_objectid, 1982 u64 parent, u64 root_objectid,
1976 struct btrfs_disk_key *key, int level, 1983 struct btrfs_disk_key *key, int level,
1977 u64 hint, u64 empty_size); 1984 u64 hint, u64 empty_size);
1985int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
1986 struct btrfs_root *root,
1987 u64 bytenr, u32 blocksize,
1988 u64 parent, u64 root_objectid, int level);
1978struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 1989struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
1979 struct btrfs_root *root, 1990 struct btrfs_root *root,
1980 u64 bytenr, u32 blocksize, 1991 u64 bytenr, u32 blocksize,
@@ -2089,6 +2100,10 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
2089 struct btrfs_path *path, 2100 struct btrfs_path *path,
2090 struct btrfs_key *new_key, 2101 struct btrfs_key *new_key,
2091 unsigned long split_offset); 2102 unsigned long split_offset);
2103int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
2104 struct btrfs_root *root,
2105 struct btrfs_path *path,
2106 struct btrfs_key *new_key);
2092int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 2107int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2093 *root, struct btrfs_key *key, struct btrfs_path *p, int 2108 *root, struct btrfs_key *key, struct btrfs_path *p, int
2094 ins_len, int cow); 2109 ins_len, int cow);
@@ -2196,9 +2211,10 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
2196 struct btrfs_path *path, 2211 struct btrfs_path *path,
2197 struct btrfs_dir_item *di); 2212 struct btrfs_dir_item *di);
2198int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, 2213int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
2199 struct btrfs_root *root, const char *name, 2214 struct btrfs_root *root,
2200 u16 name_len, const void *data, u16 data_len, 2215 struct btrfs_path *path, u64 objectid,
2201 u64 dir); 2216 const char *name, u16 name_len,
2217 const void *data, u16 data_len);
2202struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, 2218struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
2203 struct btrfs_root *root, 2219 struct btrfs_root *root,
2204 struct btrfs_path *path, u64 dir, 2220 struct btrfs_path *path, u64 dir,
@@ -2292,7 +2308,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2292 struct inode *inode, u64 new_size, 2308 struct inode *inode, u64 new_size,
2293 u32 min_type); 2309 u32 min_type);
2294 2310
2295int btrfs_start_delalloc_inodes(struct btrfs_root *root); 2311int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
2296int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); 2312int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
2297int btrfs_writepages(struct address_space *mapping, 2313int btrfs_writepages(struct address_space *mapping,
2298 struct writeback_control *wbc); 2314 struct writeback_control *wbc);
@@ -2332,6 +2348,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
2332void btrfs_orphan_cleanup(struct btrfs_root *root); 2348void btrfs_orphan_cleanup(struct btrfs_root *root);
2333int btrfs_cont_expand(struct inode *inode, loff_t size); 2349int btrfs_cont_expand(struct inode *inode, loff_t size);
2334int btrfs_invalidate_inodes(struct btrfs_root *root); 2350int btrfs_invalidate_inodes(struct btrfs_root *root);
2351void btrfs_add_delayed_iput(struct inode *inode);
2352void btrfs_run_delayed_iputs(struct btrfs_root *root);
2335extern const struct dentry_operations btrfs_dentry_operations; 2353extern const struct dentry_operations btrfs_dentry_operations;
2336 2354
2337/* ioctl.c */ 2355/* ioctl.c */
@@ -2345,12 +2363,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
2345 int skip_pinned); 2363 int skip_pinned);
2346int btrfs_check_file(struct btrfs_root *root, struct inode *inode); 2364int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
2347extern const struct file_operations btrfs_file_operations; 2365extern const struct file_operations btrfs_file_operations;
2348int btrfs_drop_extents(struct btrfs_trans_handle *trans, 2366int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2349 struct btrfs_root *root, struct inode *inode, 2367 u64 start, u64 end, u64 *hint_byte, int drop_cache);
2350 u64 start, u64 end, u64 locked_end,
2351 u64 inline_limit, u64 *hint_block, int drop_cache);
2352int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2368int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2353 struct btrfs_root *root,
2354 struct inode *inode, u64 start, u64 end); 2369 struct inode *inode, u64 start, u64 end);
2355int btrfs_release_file(struct inode *inode, struct file *file); 2370int btrfs_release_file(struct inode *inode, struct file *file);
2356 2371
@@ -2380,7 +2395,8 @@ int btrfs_check_acl(struct inode *inode, int mask);
2380#else 2395#else
2381#define btrfs_check_acl NULL 2396#define btrfs_check_acl NULL
2382#endif 2397#endif
2383int btrfs_init_acl(struct inode *inode, struct inode *dir); 2398int btrfs_init_acl(struct btrfs_trans_handle *trans,
2399 struct inode *inode, struct inode *dir);
2384int btrfs_acl_chmod(struct inode *inode); 2400int btrfs_acl_chmod(struct inode *inode);
2385 2401
2386/* relocation.c */ 2402/* relocation.c */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index f3a6075519cc..e9103b3baa49 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -68,12 +68,12 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
68 * into the tree 68 * into the tree
69 */ 69 */
70int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, 70int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
71 struct btrfs_root *root, const char *name, 71 struct btrfs_root *root,
72 u16 name_len, const void *data, u16 data_len, 72 struct btrfs_path *path, u64 objectid,
73 u64 dir) 73 const char *name, u16 name_len,
74 const void *data, u16 data_len)
74{ 75{
75 int ret = 0; 76 int ret = 0;
76 struct btrfs_path *path;
77 struct btrfs_dir_item *dir_item; 77 struct btrfs_dir_item *dir_item;
78 unsigned long name_ptr, data_ptr; 78 unsigned long name_ptr, data_ptr;
79 struct btrfs_key key, location; 79 struct btrfs_key key, location;
@@ -81,15 +81,11 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
81 struct extent_buffer *leaf; 81 struct extent_buffer *leaf;
82 u32 data_size; 82 u32 data_size;
83 83
84 key.objectid = dir; 84 BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root));
85
86 key.objectid = objectid;
85 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); 87 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
86 key.offset = btrfs_name_hash(name, name_len); 88 key.offset = btrfs_name_hash(name, name_len);
87 path = btrfs_alloc_path();
88 if (!path)
89 return -ENOMEM;
90 if (name_len + data_len + sizeof(struct btrfs_dir_item) >
91 BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item))
92 return -ENOSPC;
93 89
94 data_size = sizeof(*dir_item) + name_len + data_len; 90 data_size = sizeof(*dir_item) + name_len + data_len;
95 dir_item = insert_with_overflow(trans, root, path, &key, data_size, 91 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
@@ -117,7 +113,6 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
117 write_extent_buffer(leaf, data, data_ptr, data_len); 113 write_extent_buffer(leaf, data, data_ptr, data_len);
118 btrfs_mark_buffer_dirty(path->nodes[0]); 114 btrfs_mark_buffer_dirty(path->nodes[0]);
119 115
120 btrfs_free_path(path);
121 return ret; 116 return ret;
122} 117}
123 118
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 02b6afbd7450..009e3bd18f23 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -892,6 +892,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
892 root->stripesize = stripesize; 892 root->stripesize = stripesize;
893 root->ref_cows = 0; 893 root->ref_cows = 0;
894 root->track_dirty = 0; 894 root->track_dirty = 0;
895 root->in_radix = 0;
896 root->clean_orphans = 0;
895 897
896 root->fs_info = fs_info; 898 root->fs_info = fs_info;
897 root->objectid = objectid; 899 root->objectid = objectid;
@@ -928,7 +930,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
928 root->defrag_trans_start = fs_info->generation; 930 root->defrag_trans_start = fs_info->generation;
929 init_completion(&root->kobj_unregister); 931 init_completion(&root->kobj_unregister);
930 root->defrag_running = 0; 932 root->defrag_running = 0;
931 root->defrag_level = 0;
932 root->root_key.objectid = objectid; 933 root->root_key.objectid = objectid;
933 root->anon_super.s_root = NULL; 934 root->anon_super.s_root = NULL;
934 root->anon_super.s_dev = 0; 935 root->anon_super.s_dev = 0;
@@ -980,12 +981,12 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
980 981
981 while (1) { 982 while (1) {
982 ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, 983 ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
983 0, &start, &end, EXTENT_DIRTY); 984 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
984 if (ret) 985 if (ret)
985 break; 986 break;
986 987
987 clear_extent_dirty(&log_root_tree->dirty_log_pages, 988 clear_extent_bits(&log_root_tree->dirty_log_pages, start, end,
988 start, end, GFP_NOFS); 989 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
989 } 990 }
990 eb = fs_info->log_root_tree->node; 991 eb = fs_info->log_root_tree->node;
991 992
@@ -1210,8 +1211,10 @@ again:
1210 ret = radix_tree_insert(&fs_info->fs_roots_radix, 1211 ret = radix_tree_insert(&fs_info->fs_roots_radix,
1211 (unsigned long)root->root_key.objectid, 1212 (unsigned long)root->root_key.objectid,
1212 root); 1213 root);
1213 if (ret == 0) 1214 if (ret == 0) {
1214 root->in_radix = 1; 1215 root->in_radix = 1;
1216 root->clean_orphans = 1;
1217 }
1215 spin_unlock(&fs_info->fs_roots_radix_lock); 1218 spin_unlock(&fs_info->fs_roots_radix_lock);
1216 radix_tree_preload_end(); 1219 radix_tree_preload_end();
1217 if (ret) { 1220 if (ret) {
@@ -1225,10 +1228,6 @@ again:
1225 ret = btrfs_find_dead_roots(fs_info->tree_root, 1228 ret = btrfs_find_dead_roots(fs_info->tree_root,
1226 root->root_key.objectid); 1229 root->root_key.objectid);
1227 WARN_ON(ret); 1230 WARN_ON(ret);
1228
1229 if (!(fs_info->sb->s_flags & MS_RDONLY))
1230 btrfs_orphan_cleanup(root);
1231
1232 return root; 1231 return root;
1233fail: 1232fail:
1234 free_fs_root(root); 1233 free_fs_root(root);
@@ -1477,6 +1476,7 @@ static int cleaner_kthread(void *arg)
1477 1476
1478 if (!(root->fs_info->sb->s_flags & MS_RDONLY) && 1477 if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
1479 mutex_trylock(&root->fs_info->cleaner_mutex)) { 1478 mutex_trylock(&root->fs_info->cleaner_mutex)) {
1479 btrfs_run_delayed_iputs(root);
1480 btrfs_clean_old_snapshots(root); 1480 btrfs_clean_old_snapshots(root);
1481 mutex_unlock(&root->fs_info->cleaner_mutex); 1481 mutex_unlock(&root->fs_info->cleaner_mutex);
1482 } 1482 }
@@ -1606,6 +1606,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1606 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 1606 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
1607 INIT_LIST_HEAD(&fs_info->trans_list); 1607 INIT_LIST_HEAD(&fs_info->trans_list);
1608 INIT_LIST_HEAD(&fs_info->dead_roots); 1608 INIT_LIST_HEAD(&fs_info->dead_roots);
1609 INIT_LIST_HEAD(&fs_info->delayed_iputs);
1609 INIT_LIST_HEAD(&fs_info->hashers); 1610 INIT_LIST_HEAD(&fs_info->hashers);
1610 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1611 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1611 INIT_LIST_HEAD(&fs_info->ordered_operations); 1612 INIT_LIST_HEAD(&fs_info->ordered_operations);
@@ -1614,6 +1615,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1614 spin_lock_init(&fs_info->new_trans_lock); 1615 spin_lock_init(&fs_info->new_trans_lock);
1615 spin_lock_init(&fs_info->ref_cache_lock); 1616 spin_lock_init(&fs_info->ref_cache_lock);
1616 spin_lock_init(&fs_info->fs_roots_radix_lock); 1617 spin_lock_init(&fs_info->fs_roots_radix_lock);
1618 spin_lock_init(&fs_info->delayed_iput_lock);
1617 1619
1618 init_completion(&fs_info->kobj_unregister); 1620 init_completion(&fs_info->kobj_unregister);
1619 fs_info->tree_root = tree_root; 1621 fs_info->tree_root = tree_root;
@@ -1689,6 +1691,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1689 mutex_init(&fs_info->cleaner_mutex); 1691 mutex_init(&fs_info->cleaner_mutex);
1690 mutex_init(&fs_info->volume_mutex); 1692 mutex_init(&fs_info->volume_mutex);
1691 init_rwsem(&fs_info->extent_commit_sem); 1693 init_rwsem(&fs_info->extent_commit_sem);
1694 init_rwsem(&fs_info->cleanup_work_sem);
1692 init_rwsem(&fs_info->subvol_sem); 1695 init_rwsem(&fs_info->subvol_sem);
1693 1696
1694 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 1697 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
@@ -2386,8 +2389,14 @@ int btrfs_commit_super(struct btrfs_root *root)
2386 int ret; 2389 int ret;
2387 2390
2388 mutex_lock(&root->fs_info->cleaner_mutex); 2391 mutex_lock(&root->fs_info->cleaner_mutex);
2392 btrfs_run_delayed_iputs(root);
2389 btrfs_clean_old_snapshots(root); 2393 btrfs_clean_old_snapshots(root);
2390 mutex_unlock(&root->fs_info->cleaner_mutex); 2394 mutex_unlock(&root->fs_info->cleaner_mutex);
2395
2396 /* wait until ongoing cleanup work done */
2397 down_write(&root->fs_info->cleanup_work_sem);
2398 up_write(&root->fs_info->cleanup_work_sem);
2399
2391 trans = btrfs_start_transaction(root, 1); 2400 trans = btrfs_start_transaction(root, 1);
2392 ret = btrfs_commit_transaction(trans, root); 2401 ret = btrfs_commit_transaction(trans, root);
2393 BUG_ON(ret); 2402 BUG_ON(ret);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 94627c4cc193..432a2da4641e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -83,6 +83,17 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
83 return (cache->flags & bits) == bits; 83 return (cache->flags & bits) == bits;
84} 84}
85 85
86void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
87{
88 atomic_inc(&cache->count);
89}
90
91void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
92{
93 if (atomic_dec_and_test(&cache->count))
94 kfree(cache);
95}
96
86/* 97/*
87 * this adds the block group to the fs_info rb tree for the block group 98 * this adds the block group to the fs_info rb tree for the block group
88 * cache 99 * cache
@@ -156,7 +167,7 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
156 } 167 }
157 } 168 }
158 if (ret) 169 if (ret)
159 atomic_inc(&ret->count); 170 btrfs_get_block_group(ret);
160 spin_unlock(&info->block_group_cache_lock); 171 spin_unlock(&info->block_group_cache_lock);
161 172
162 return ret; 173 return ret;
@@ -195,6 +206,14 @@ static int exclude_super_stripes(struct btrfs_root *root,
195 int stripe_len; 206 int stripe_len;
196 int i, nr, ret; 207 int i, nr, ret;
197 208
209 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
210 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
211 cache->bytes_super += stripe_len;
212 ret = add_excluded_extent(root, cache->key.objectid,
213 stripe_len);
214 BUG_ON(ret);
215 }
216
198 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 217 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
199 bytenr = btrfs_sb_offset(i); 218 bytenr = btrfs_sb_offset(i);
200 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 219 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
@@ -255,7 +274,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
255 if (ret) 274 if (ret)
256 break; 275 break;
257 276
258 if (extent_start == start) { 277 if (extent_start <= start) {
259 start = extent_end + 1; 278 start = extent_end + 1;
260 } else if (extent_start > start && extent_start < end) { 279 } else if (extent_start > start && extent_start < end) {
261 size = extent_start - start; 280 size = extent_start - start;
@@ -399,6 +418,8 @@ err:
399 418
400 put_caching_control(caching_ctl); 419 put_caching_control(caching_ctl);
401 atomic_dec(&block_group->space_info->caching_threads); 420 atomic_dec(&block_group->space_info->caching_threads);
421 btrfs_put_block_group(block_group);
422
402 return 0; 423 return 0;
403} 424}
404 425
@@ -439,6 +460,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
439 up_write(&fs_info->extent_commit_sem); 460 up_write(&fs_info->extent_commit_sem);
440 461
441 atomic_inc(&cache->space_info->caching_threads); 462 atomic_inc(&cache->space_info->caching_threads);
463 btrfs_get_block_group(cache);
442 464
443 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 465 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
444 cache->key.objectid); 466 cache->key.objectid);
@@ -478,12 +500,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
478 return cache; 500 return cache;
479} 501}
480 502
481void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
482{
483 if (atomic_dec_and_test(&cache->count))
484 kfree(cache);
485}
486
487static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 503static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
488 u64 flags) 504 u64 flags)
489{ 505{
@@ -2574,7 +2590,7 @@ next_block_group(struct btrfs_root *root,
2574 if (node) { 2590 if (node) {
2575 cache = rb_entry(node, struct btrfs_block_group_cache, 2591 cache = rb_entry(node, struct btrfs_block_group_cache,
2576 cache_node); 2592 cache_node);
2577 atomic_inc(&cache->count); 2593 btrfs_get_block_group(cache);
2578 } else 2594 } else
2579 cache = NULL; 2595 cache = NULL;
2580 spin_unlock(&root->fs_info->block_group_cache_lock); 2596 spin_unlock(&root->fs_info->block_group_cache_lock);
@@ -2880,9 +2896,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work)
2880 root = async->root; 2896 root = async->root;
2881 info = async->info; 2897 info = async->info;
2882 2898
2883 btrfs_start_delalloc_inodes(root); 2899 btrfs_start_delalloc_inodes(root, 0);
2884 wake_up(&info->flush_wait); 2900 wake_up(&info->flush_wait);
2885 btrfs_wait_ordered_extents(root, 0); 2901 btrfs_wait_ordered_extents(root, 0, 0);
2886 2902
2887 spin_lock(&info->lock); 2903 spin_lock(&info->lock);
2888 info->flushing = 0; 2904 info->flushing = 0;
@@ -2956,8 +2972,8 @@ static void flush_delalloc(struct btrfs_root *root,
2956 return; 2972 return;
2957 2973
2958flush: 2974flush:
2959 btrfs_start_delalloc_inodes(root); 2975 btrfs_start_delalloc_inodes(root, 0);
2960 btrfs_wait_ordered_extents(root, 0); 2976 btrfs_wait_ordered_extents(root, 0, 0);
2961 2977
2962 spin_lock(&info->lock); 2978 spin_lock(&info->lock);
2963 info->flushing = 0; 2979 info->flushing = 0;
@@ -3454,14 +3470,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3454 else 3470 else
3455 old_val -= num_bytes; 3471 old_val -= num_bytes;
3456 btrfs_set_super_bytes_used(&info->super_copy, old_val); 3472 btrfs_set_super_bytes_used(&info->super_copy, old_val);
3457
3458 /* block accounting for root item */
3459 old_val = btrfs_root_used(&root->root_item);
3460 if (alloc)
3461 old_val += num_bytes;
3462 else
3463 old_val -= num_bytes;
3464 btrfs_set_root_used(&root->root_item, old_val);
3465 spin_unlock(&info->delalloc_lock); 3473 spin_unlock(&info->delalloc_lock);
3466 3474
3467 while (total) { 3475 while (total) {
@@ -4049,6 +4057,21 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
4049 return ret; 4057 return ret;
4050} 4058}
4051 4059
4060int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4061 struct btrfs_root *root,
4062 u64 bytenr, u32 blocksize,
4063 u64 parent, u64 root_objectid, int level)
4064{
4065 u64 used;
4066 spin_lock(&root->node_lock);
4067 used = btrfs_root_used(&root->root_item) - blocksize;
4068 btrfs_set_root_used(&root->root_item, used);
4069 spin_unlock(&root->node_lock);
4070
4071 return btrfs_free_extent(trans, root, bytenr, blocksize,
4072 parent, root_objectid, level, 0);
4073}
4074
4052static u64 stripe_align(struct btrfs_root *root, u64 val) 4075static u64 stripe_align(struct btrfs_root *root, u64 val)
4053{ 4076{
4054 u64 mask = ((u64)root->stripesize - 1); 4077 u64 mask = ((u64)root->stripesize - 1);
@@ -4212,7 +4235,7 @@ search:
4212 u64 offset; 4235 u64 offset;
4213 int cached; 4236 int cached;
4214 4237
4215 atomic_inc(&block_group->count); 4238 btrfs_get_block_group(block_group);
4216 search_start = block_group->key.objectid; 4239 search_start = block_group->key.objectid;
4217 4240
4218have_block_group: 4241have_block_group:
@@ -4300,7 +4323,7 @@ have_block_group:
4300 4323
4301 btrfs_put_block_group(block_group); 4324 btrfs_put_block_group(block_group);
4302 block_group = last_ptr->block_group; 4325 block_group = last_ptr->block_group;
4303 atomic_inc(&block_group->count); 4326 btrfs_get_block_group(block_group);
4304 spin_unlock(&last_ptr->lock); 4327 spin_unlock(&last_ptr->lock);
4305 spin_unlock(&last_ptr->refill_lock); 4328 spin_unlock(&last_ptr->refill_lock);
4306 4329
@@ -4578,7 +4601,6 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
4578{ 4601{
4579 int ret; 4602 int ret;
4580 u64 search_start = 0; 4603 u64 search_start = 0;
4581 struct btrfs_fs_info *info = root->fs_info;
4582 4604
4583 data = btrfs_get_alloc_profile(root, data); 4605 data = btrfs_get_alloc_profile(root, data);
4584again: 4606again:
@@ -4586,17 +4608,9 @@ again:
4586 * the only place that sets empty_size is btrfs_realloc_node, which 4608 * the only place that sets empty_size is btrfs_realloc_node, which
4587 * is not called recursively on allocations 4609 * is not called recursively on allocations
4588 */ 4610 */
4589 if (empty_size || root->ref_cows) { 4611 if (empty_size || root->ref_cows)
4590 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
4591 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4592 2 * 1024 * 1024,
4593 BTRFS_BLOCK_GROUP_METADATA |
4594 (info->metadata_alloc_profile &
4595 info->avail_metadata_alloc_bits), 0);
4596 }
4597 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 4612 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4598 num_bytes + 2 * 1024 * 1024, data, 0); 4613 num_bytes + 2 * 1024 * 1024, data, 0);
4599 }
4600 4614
4601 WARN_ON(num_bytes < root->sectorsize); 4615 WARN_ON(num_bytes < root->sectorsize);
4602 ret = find_free_extent(trans, root, num_bytes, empty_size, 4616 ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -4897,6 +4911,14 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4897 extent_op); 4911 extent_op);
4898 BUG_ON(ret); 4912 BUG_ON(ret);
4899 } 4913 }
4914
4915 if (root_objectid == root->root_key.objectid) {
4916 u64 used;
4917 spin_lock(&root->node_lock);
4918 used = btrfs_root_used(&root->root_item) + num_bytes;
4919 btrfs_set_root_used(&root->root_item, used);
4920 spin_unlock(&root->node_lock);
4921 }
4900 return ret; 4922 return ret;
4901} 4923}
4902 4924
@@ -4919,8 +4941,16 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
4919 btrfs_set_buffer_uptodate(buf); 4941 btrfs_set_buffer_uptodate(buf);
4920 4942
4921 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 4943 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4922 set_extent_dirty(&root->dirty_log_pages, buf->start, 4944 /*
4923 buf->start + buf->len - 1, GFP_NOFS); 4945 * we allow two log transactions at a time, use different
4946 * EXENT bit to differentiate dirty pages.
4947 */
4948 if (root->log_transid % 2 == 0)
4949 set_extent_dirty(&root->dirty_log_pages, buf->start,
4950 buf->start + buf->len - 1, GFP_NOFS);
4951 else
4952 set_extent_new(&root->dirty_log_pages, buf->start,
4953 buf->start + buf->len - 1, GFP_NOFS);
4924 } else { 4954 } else {
4925 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 4955 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4926 buf->start + buf->len - 1, GFP_NOFS); 4956 buf->start + buf->len - 1, GFP_NOFS);
@@ -7373,9 +7403,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7373 wait_block_group_cache_done(block_group); 7403 wait_block_group_cache_done(block_group);
7374 7404
7375 btrfs_remove_free_space_cache(block_group); 7405 btrfs_remove_free_space_cache(block_group);
7376 7406 btrfs_put_block_group(block_group);
7377 WARN_ON(atomic_read(&block_group->count) != 1);
7378 kfree(block_group);
7379 7407
7380 spin_lock(&info->block_group_cache_lock); 7408 spin_lock(&info->block_group_cache_lock);
7381 } 7409 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 77f759302e12..c02033596f02 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -179,18 +179,14 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
179 } 179 }
180 flags = em->flags; 180 flags = em->flags;
181 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { 181 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
182 if (em->start <= start && 182 if (testend && em->start + em->len >= start + len) {
183 (!testend || em->start + em->len >= start + len)) {
184 free_extent_map(em); 183 free_extent_map(em);
185 write_unlock(&em_tree->lock); 184 write_unlock(&em_tree->lock);
186 break; 185 break;
187 } 186 }
188 if (start < em->start) { 187 start = em->start + em->len;
189 len = em->start - start; 188 if (testend)
190 } else {
191 len = start + len - (em->start + em->len); 189 len = start + len - (em->start + em->len);
192 start = em->start + em->len;
193 }
194 free_extent_map(em); 190 free_extent_map(em);
195 write_unlock(&em_tree->lock); 191 write_unlock(&em_tree->lock);
196 continue; 192 continue;
@@ -265,324 +261,253 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
265 * If an extent intersects the range but is not entirely inside the range 261 * If an extent intersects the range but is not entirely inside the range
266 * it is either truncated or split. Anything entirely inside the range 262 * it is either truncated or split. Anything entirely inside the range
267 * is deleted from the tree. 263 * is deleted from the tree.
268 *
269 * inline_limit is used to tell this code which offsets in the file to keep
270 * if they contain inline extents.
271 */ 264 */
272noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, 265int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
273 struct btrfs_root *root, struct inode *inode, 266 u64 start, u64 end, u64 *hint_byte, int drop_cache)
274 u64 start, u64 end, u64 locked_end,
275 u64 inline_limit, u64 *hint_byte, int drop_cache)
276{ 267{
277 u64 extent_end = 0; 268 struct btrfs_root *root = BTRFS_I(inode)->root;
278 u64 search_start = start;
279 u64 ram_bytes = 0;
280 u64 disk_bytenr = 0;
281 u64 orig_locked_end = locked_end;
282 u8 compression;
283 u8 encryption;
284 u16 other_encoding = 0;
285 struct extent_buffer *leaf; 269 struct extent_buffer *leaf;
286 struct btrfs_file_extent_item *extent; 270 struct btrfs_file_extent_item *fi;
287 struct btrfs_path *path; 271 struct btrfs_path *path;
288 struct btrfs_key key; 272 struct btrfs_key key;
289 struct btrfs_file_extent_item old; 273 struct btrfs_key new_key;
290 int keep; 274 u64 search_start = start;
291 int slot; 275 u64 disk_bytenr = 0;
292 int bookend; 276 u64 num_bytes = 0;
293 int found_type = 0; 277 u64 extent_offset = 0;
294 int found_extent; 278 u64 extent_end = 0;
295 int found_inline; 279 int del_nr = 0;
280 int del_slot = 0;
281 int extent_type;
296 int recow; 282 int recow;
297 int ret; 283 int ret;
298 284
299 inline_limit = 0;
300 if (drop_cache) 285 if (drop_cache)
301 btrfs_drop_extent_cache(inode, start, end - 1, 0); 286 btrfs_drop_extent_cache(inode, start, end - 1, 0);
302 287
303 path = btrfs_alloc_path(); 288 path = btrfs_alloc_path();
304 if (!path) 289 if (!path)
305 return -ENOMEM; 290 return -ENOMEM;
291
306 while (1) { 292 while (1) {
307 recow = 0; 293 recow = 0;
308 btrfs_release_path(root, path);
309 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, 294 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
310 search_start, -1); 295 search_start, -1);
311 if (ret < 0) 296 if (ret < 0)
312 goto out; 297 break;
313 if (ret > 0) { 298 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
314 if (path->slots[0] == 0) { 299 leaf = path->nodes[0];
315 ret = 0; 300 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
316 goto out; 301 if (key.objectid == inode->i_ino &&
317 } 302 key.type == BTRFS_EXTENT_DATA_KEY)
318 path->slots[0]--; 303 path->slots[0]--;
319 } 304 }
305 ret = 0;
320next_slot: 306next_slot:
321 keep = 0;
322 bookend = 0;
323 found_extent = 0;
324 found_inline = 0;
325 compression = 0;
326 encryption = 0;
327 extent = NULL;
328 leaf = path->nodes[0]; 307 leaf = path->nodes[0];
329 slot = path->slots[0]; 308 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
330 ret = 0; 309 BUG_ON(del_nr > 0);
331 btrfs_item_key_to_cpu(leaf, &key, slot); 310 ret = btrfs_next_leaf(root, path);
332 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY && 311 if (ret < 0)
333 key.offset >= end) { 312 break;
334 goto out; 313 if (ret > 0) {
335 } 314 ret = 0;
336 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 315 break;
337 key.objectid != inode->i_ino) {
338 goto out;
339 }
340 if (recow) {
341 search_start = max(key.offset, start);
342 continue;
343 }
344 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
345 extent = btrfs_item_ptr(leaf, slot,
346 struct btrfs_file_extent_item);
347 found_type = btrfs_file_extent_type(leaf, extent);
348 compression = btrfs_file_extent_compression(leaf,
349 extent);
350 encryption = btrfs_file_extent_encryption(leaf,
351 extent);
352 other_encoding = btrfs_file_extent_other_encoding(leaf,
353 extent);
354 if (found_type == BTRFS_FILE_EXTENT_REG ||
355 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
356 extent_end =
357 btrfs_file_extent_disk_bytenr(leaf,
358 extent);
359 if (extent_end)
360 *hint_byte = extent_end;
361
362 extent_end = key.offset +
363 btrfs_file_extent_num_bytes(leaf, extent);
364 ram_bytes = btrfs_file_extent_ram_bytes(leaf,
365 extent);
366 found_extent = 1;
367 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
368 found_inline = 1;
369 extent_end = key.offset +
370 btrfs_file_extent_inline_len(leaf, extent);
371 } 316 }
317 leaf = path->nodes[0];
318 recow = 1;
319 }
320
321 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
322 if (key.objectid > inode->i_ino ||
323 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
324 break;
325
326 fi = btrfs_item_ptr(leaf, path->slots[0],
327 struct btrfs_file_extent_item);
328 extent_type = btrfs_file_extent_type(leaf, fi);
329
330 if (extent_type == BTRFS_FILE_EXTENT_REG ||
331 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
332 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
333 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
334 extent_offset = btrfs_file_extent_offset(leaf, fi);
335 extent_end = key.offset +
336 btrfs_file_extent_num_bytes(leaf, fi);
337 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
338 extent_end = key.offset +
339 btrfs_file_extent_inline_len(leaf, fi);
372 } else { 340 } else {
341 WARN_ON(1);
373 extent_end = search_start; 342 extent_end = search_start;
374 } 343 }
375 344
376 /* we found nothing we can drop */ 345 if (extent_end <= search_start) {
377 if ((!found_extent && !found_inline) || 346 path->slots[0]++;
378 search_start >= extent_end) {
379 int nextret;
380 u32 nritems;
381 nritems = btrfs_header_nritems(leaf);
382 if (slot >= nritems - 1) {
383 nextret = btrfs_next_leaf(root, path);
384 if (nextret)
385 goto out;
386 recow = 1;
387 } else {
388 path->slots[0]++;
389 }
390 goto next_slot; 347 goto next_slot;
391 } 348 }
392 349
393 if (end <= extent_end && start >= key.offset && found_inline) 350 search_start = max(key.offset, start);
394 *hint_byte = EXTENT_MAP_INLINE; 351 if (recow) {
395 352 btrfs_release_path(root, path);
396 if (found_extent) { 353 continue;
397 read_extent_buffer(leaf, &old, (unsigned long)extent,
398 sizeof(old));
399 }
400
401 if (end < extent_end && end >= key.offset) {
402 bookend = 1;
403 if (found_inline && start <= key.offset)
404 keep = 1;
405 } 354 }
406 355
407 if (bookend && found_extent) { 356 /*
408 if (locked_end < extent_end) { 357 * | - range to drop - |
409 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 358 * | -------- extent -------- |
410 locked_end, extent_end - 1, 359 */
411 GFP_NOFS); 360 if (start > key.offset && end < extent_end) {
412 if (!ret) { 361 BUG_ON(del_nr > 0);
413 btrfs_release_path(root, path); 362 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
414 lock_extent(&BTRFS_I(inode)->io_tree, 363
415 locked_end, extent_end - 1, 364 memcpy(&new_key, &key, sizeof(new_key));
416 GFP_NOFS); 365 new_key.offset = start;
417 locked_end = extent_end; 366 ret = btrfs_duplicate_item(trans, root, path,
418 continue; 367 &new_key);
419 } 368 if (ret == -EAGAIN) {
420 locked_end = extent_end; 369 btrfs_release_path(root, path);
370 continue;
421 } 371 }
422 disk_bytenr = le64_to_cpu(old.disk_bytenr); 372 if (ret < 0)
423 if (disk_bytenr != 0) { 373 break;
374
375 leaf = path->nodes[0];
376 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
377 struct btrfs_file_extent_item);
378 btrfs_set_file_extent_num_bytes(leaf, fi,
379 start - key.offset);
380
381 fi = btrfs_item_ptr(leaf, path->slots[0],
382 struct btrfs_file_extent_item);
383
384 extent_offset += start - key.offset;
385 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
386 btrfs_set_file_extent_num_bytes(leaf, fi,
387 extent_end - start);
388 btrfs_mark_buffer_dirty(leaf);
389
390 if (disk_bytenr > 0) {
424 ret = btrfs_inc_extent_ref(trans, root, 391 ret = btrfs_inc_extent_ref(trans, root,
425 disk_bytenr, 392 disk_bytenr, num_bytes, 0,
426 le64_to_cpu(old.disk_num_bytes), 0, 393 root->root_key.objectid,
427 root->root_key.objectid, 394 new_key.objectid,
428 key.objectid, key.offset - 395 start - extent_offset);
429 le64_to_cpu(old.offset));
430 BUG_ON(ret); 396 BUG_ON(ret);
397 *hint_byte = disk_bytenr;
431 } 398 }
399 key.offset = start;
432 } 400 }
401 /*
402 * | ---- range to drop ----- |
403 * | -------- extent -------- |
404 */
405 if (start <= key.offset && end < extent_end) {
406 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
433 407
434 if (found_inline) { 408 memcpy(&new_key, &key, sizeof(new_key));
435 u64 mask = root->sectorsize - 1; 409 new_key.offset = end;
436 search_start = (extent_end + mask) & ~mask; 410 btrfs_set_item_key_safe(trans, root, path, &new_key);
437 } else 411
438 search_start = extent_end; 412 extent_offset += end - key.offset;
439 413 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
440 /* truncate existing extent */ 414 btrfs_set_file_extent_num_bytes(leaf, fi,
441 if (start > key.offset) { 415 extent_end - end);
442 u64 new_num; 416 btrfs_mark_buffer_dirty(leaf);
443 u64 old_num; 417 if (disk_bytenr > 0) {
444 keep = 1; 418 inode_sub_bytes(inode, end - key.offset);
445 WARN_ON(start & (root->sectorsize - 1)); 419 *hint_byte = disk_bytenr;
446 if (found_extent) {
447 new_num = start - key.offset;
448 old_num = btrfs_file_extent_num_bytes(leaf,
449 extent);
450 *hint_byte =
451 btrfs_file_extent_disk_bytenr(leaf,
452 extent);
453 if (btrfs_file_extent_disk_bytenr(leaf,
454 extent)) {
455 inode_sub_bytes(inode, old_num -
456 new_num);
457 }
458 btrfs_set_file_extent_num_bytes(leaf,
459 extent, new_num);
460 btrfs_mark_buffer_dirty(leaf);
461 } else if (key.offset < inline_limit &&
462 (end > extent_end) &&
463 (inline_limit < extent_end)) {
464 u32 new_size;
465 new_size = btrfs_file_extent_calc_inline_size(
466 inline_limit - key.offset);
467 inode_sub_bytes(inode, extent_end -
468 inline_limit);
469 btrfs_set_file_extent_ram_bytes(leaf, extent,
470 new_size);
471 if (!compression && !encryption) {
472 btrfs_truncate_item(trans, root, path,
473 new_size, 1);
474 }
475 } 420 }
421 break;
476 } 422 }
477 /* delete the entire extent */
478 if (!keep) {
479 if (found_inline)
480 inode_sub_bytes(inode, extent_end -
481 key.offset);
482 ret = btrfs_del_item(trans, root, path);
483 /* TODO update progress marker and return */
484 BUG_ON(ret);
485 extent = NULL;
486 btrfs_release_path(root, path);
487 /* the extent will be freed later */
488 }
489 if (bookend && found_inline && start <= key.offset) {
490 u32 new_size;
491 new_size = btrfs_file_extent_calc_inline_size(
492 extent_end - end);
493 inode_sub_bytes(inode, end - key.offset);
494 btrfs_set_file_extent_ram_bytes(leaf, extent,
495 new_size);
496 if (!compression && !encryption)
497 ret = btrfs_truncate_item(trans, root, path,
498 new_size, 0);
499 BUG_ON(ret);
500 }
501 /* create bookend, splitting the extent in two */
502 if (bookend && found_extent) {
503 struct btrfs_key ins;
504 ins.objectid = inode->i_ino;
505 ins.offset = end;
506 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
507 423
508 btrfs_release_path(root, path); 424 search_start = extent_end;
509 path->leave_spinning = 1; 425 /*
510 ret = btrfs_insert_empty_item(trans, root, path, &ins, 426 * | ---- range to drop ----- |
511 sizeof(*extent)); 427 * | -------- extent -------- |
512 BUG_ON(ret); 428 */
429 if (start > key.offset && end >= extent_end) {
430 BUG_ON(del_nr > 0);
431 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
513 432
514 leaf = path->nodes[0]; 433 btrfs_set_file_extent_num_bytes(leaf, fi,
515 extent = btrfs_item_ptr(leaf, path->slots[0], 434 start - key.offset);
516 struct btrfs_file_extent_item); 435 btrfs_mark_buffer_dirty(leaf);
517 write_extent_buffer(leaf, &old, 436 if (disk_bytenr > 0) {
518 (unsigned long)extent, sizeof(old)); 437 inode_sub_bytes(inode, extent_end - start);
519 438 *hint_byte = disk_bytenr;
520 btrfs_set_file_extent_compression(leaf, extent, 439 }
521 compression); 440 if (end == extent_end)
522 btrfs_set_file_extent_encryption(leaf, extent, 441 break;
523 encryption);
524 btrfs_set_file_extent_other_encoding(leaf, extent,
525 other_encoding);
526 btrfs_set_file_extent_offset(leaf, extent,
527 le64_to_cpu(old.offset) + end - key.offset);
528 WARN_ON(le64_to_cpu(old.num_bytes) <
529 (extent_end - end));
530 btrfs_set_file_extent_num_bytes(leaf, extent,
531 extent_end - end);
532 442
533 /* 443 path->slots[0]++;
534 * set the ram bytes to the size of the full extent 444 goto next_slot;
535 * before splitting. This is a worst case flag,
536 * but its the best we can do because we don't know
537 * how splitting affects compression
538 */
539 btrfs_set_file_extent_ram_bytes(leaf, extent,
540 ram_bytes);
541 btrfs_set_file_extent_type(leaf, extent, found_type);
542
543 btrfs_unlock_up_safe(path, 1);
544 btrfs_mark_buffer_dirty(path->nodes[0]);
545 btrfs_set_lock_blocking(path->nodes[0]);
546
547 path->leave_spinning = 0;
548 btrfs_release_path(root, path);
549 if (disk_bytenr != 0)
550 inode_add_bytes(inode, extent_end - end);
551 } 445 }
552 446
553 if (found_extent && !keep) { 447 /*
554 u64 old_disk_bytenr = le64_to_cpu(old.disk_bytenr); 448 * | ---- range to drop ----- |
449 * | ------ extent ------ |
450 */
451 if (start <= key.offset && end >= extent_end) {
452 if (del_nr == 0) {
453 del_slot = path->slots[0];
454 del_nr = 1;
455 } else {
456 BUG_ON(del_slot + del_nr != path->slots[0]);
457 del_nr++;
458 }
555 459
556 if (old_disk_bytenr != 0) { 460 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
557 inode_sub_bytes(inode, 461 inode_sub_bytes(inode,
558 le64_to_cpu(old.num_bytes)); 462 extent_end - key.offset);
463 extent_end = ALIGN(extent_end,
464 root->sectorsize);
465 } else if (disk_bytenr > 0) {
559 ret = btrfs_free_extent(trans, root, 466 ret = btrfs_free_extent(trans, root,
560 old_disk_bytenr, 467 disk_bytenr, num_bytes, 0,
561 le64_to_cpu(old.disk_num_bytes), 468 root->root_key.objectid,
562 0, root->root_key.objectid,
563 key.objectid, key.offset - 469 key.objectid, key.offset -
564 le64_to_cpu(old.offset)); 470 extent_offset);
565 BUG_ON(ret); 471 BUG_ON(ret);
566 *hint_byte = old_disk_bytenr; 472 inode_sub_bytes(inode,
473 extent_end - key.offset);
474 *hint_byte = disk_bytenr;
567 } 475 }
568 }
569 476
570 if (search_start >= end) { 477 if (end == extent_end)
571 ret = 0; 478 break;
572 goto out; 479
480 if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
481 path->slots[0]++;
482 goto next_slot;
483 }
484
485 ret = btrfs_del_items(trans, root, path, del_slot,
486 del_nr);
487 BUG_ON(ret);
488
489 del_nr = 0;
490 del_slot = 0;
491
492 btrfs_release_path(root, path);
493 continue;
573 } 494 }
495
496 BUG_ON(1);
574 } 497 }
575out: 498
576 btrfs_free_path(path); 499 if (del_nr > 0) {
577 if (locked_end > orig_locked_end) { 500 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
578 unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end, 501 BUG_ON(ret);
579 locked_end - 1, GFP_NOFS);
580 } 502 }
503
504 btrfs_free_path(path);
581 return ret; 505 return ret;
582} 506}
583 507
584static int extent_mergeable(struct extent_buffer *leaf, int slot, 508static int extent_mergeable(struct extent_buffer *leaf, int slot,
585 u64 objectid, u64 bytenr, u64 *start, u64 *end) 509 u64 objectid, u64 bytenr, u64 orig_offset,
510 u64 *start, u64 *end)
586{ 511{
587 struct btrfs_file_extent_item *fi; 512 struct btrfs_file_extent_item *fi;
588 struct btrfs_key key; 513 struct btrfs_key key;
@@ -598,6 +523,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
598 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 523 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
599 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG || 524 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
600 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr || 525 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
526 btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
601 btrfs_file_extent_compression(leaf, fi) || 527 btrfs_file_extent_compression(leaf, fi) ||
602 btrfs_file_extent_encryption(leaf, fi) || 528 btrfs_file_extent_encryption(leaf, fi) ||
603 btrfs_file_extent_other_encoding(leaf, fi)) 529 btrfs_file_extent_other_encoding(leaf, fi))
@@ -620,23 +546,24 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
620 * two or three. 546 * two or three.
621 */ 547 */
622int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 548int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
623 struct btrfs_root *root,
624 struct inode *inode, u64 start, u64 end) 549 struct inode *inode, u64 start, u64 end)
625{ 550{
551 struct btrfs_root *root = BTRFS_I(inode)->root;
626 struct extent_buffer *leaf; 552 struct extent_buffer *leaf;
627 struct btrfs_path *path; 553 struct btrfs_path *path;
628 struct btrfs_file_extent_item *fi; 554 struct btrfs_file_extent_item *fi;
629 struct btrfs_key key; 555 struct btrfs_key key;
556 struct btrfs_key new_key;
630 u64 bytenr; 557 u64 bytenr;
631 u64 num_bytes; 558 u64 num_bytes;
632 u64 extent_end; 559 u64 extent_end;
633 u64 orig_offset; 560 u64 orig_offset;
634 u64 other_start; 561 u64 other_start;
635 u64 other_end; 562 u64 other_end;
636 u64 split = start; 563 u64 split;
637 u64 locked_end = end; 564 int del_nr = 0;
638 int extent_type; 565 int del_slot = 0;
639 int split_end = 1; 566 int recow;
640 int ret; 567 int ret;
641 568
642 btrfs_drop_extent_cache(inode, start, end - 1, 0); 569 btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -644,12 +571,11 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
644 path = btrfs_alloc_path(); 571 path = btrfs_alloc_path();
645 BUG_ON(!path); 572 BUG_ON(!path);
646again: 573again:
574 recow = 0;
575 split = start;
647 key.objectid = inode->i_ino; 576 key.objectid = inode->i_ino;
648 key.type = BTRFS_EXTENT_DATA_KEY; 577 key.type = BTRFS_EXTENT_DATA_KEY;
649 if (split == start) 578 key.offset = split;
650 key.offset = split;
651 else
652 key.offset = split - 1;
653 579
654 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 580 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
655 if (ret > 0 && path->slots[0] > 0) 581 if (ret > 0 && path->slots[0] > 0)
@@ -661,159 +587,156 @@ again:
661 key.type != BTRFS_EXTENT_DATA_KEY); 587 key.type != BTRFS_EXTENT_DATA_KEY);
662 fi = btrfs_item_ptr(leaf, path->slots[0], 588 fi = btrfs_item_ptr(leaf, path->slots[0],
663 struct btrfs_file_extent_item); 589 struct btrfs_file_extent_item);
664 extent_type = btrfs_file_extent_type(leaf, fi); 590 BUG_ON(btrfs_file_extent_type(leaf, fi) !=
665 BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC); 591 BTRFS_FILE_EXTENT_PREALLOC);
666 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 592 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
667 BUG_ON(key.offset > start || extent_end < end); 593 BUG_ON(key.offset > start || extent_end < end);
668 594
669 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 595 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
670 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 596 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
671 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi); 597 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
598 memcpy(&new_key, &key, sizeof(new_key));
672 599
673 if (key.offset == start) 600 if (start == key.offset && end < extent_end) {
674 split = end;
675
676 if (key.offset == start && extent_end == end) {
677 int del_nr = 0;
678 int del_slot = 0;
679 other_start = end;
680 other_end = 0;
681 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
682 bytenr, &other_start, &other_end)) {
683 extent_end = other_end;
684 del_slot = path->slots[0] + 1;
685 del_nr++;
686 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
687 0, root->root_key.objectid,
688 inode->i_ino, orig_offset);
689 BUG_ON(ret);
690 }
691 other_start = 0; 601 other_start = 0;
692 other_end = start; 602 other_end = start;
693 if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino, 603 if (extent_mergeable(leaf, path->slots[0] - 1,
694 bytenr, &other_start, &other_end)) { 604 inode->i_ino, bytenr, orig_offset,
695 key.offset = other_start; 605 &other_start, &other_end)) {
696 del_slot = path->slots[0]; 606 new_key.offset = end;
697 del_nr++; 607 btrfs_set_item_key_safe(trans, root, path, &new_key);
698 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 608 fi = btrfs_item_ptr(leaf, path->slots[0],
699 0, root->root_key.objectid, 609 struct btrfs_file_extent_item);
700 inode->i_ino, orig_offset); 610 btrfs_set_file_extent_num_bytes(leaf, fi,
701 BUG_ON(ret); 611 extent_end - end);
702 } 612 btrfs_set_file_extent_offset(leaf, fi,
703 split_end = 0; 613 end - orig_offset);
704 if (del_nr == 0) { 614 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
705 btrfs_set_file_extent_type(leaf, fi, 615 struct btrfs_file_extent_item);
706 BTRFS_FILE_EXTENT_REG); 616 btrfs_set_file_extent_num_bytes(leaf, fi,
707 goto done; 617 end - other_start);
708 } 618 btrfs_mark_buffer_dirty(leaf);
709 619 goto out;
710 fi = btrfs_item_ptr(leaf, del_slot - 1,
711 struct btrfs_file_extent_item);
712 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
713 btrfs_set_file_extent_num_bytes(leaf, fi,
714 extent_end - key.offset);
715 btrfs_mark_buffer_dirty(leaf);
716
717 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
718 BUG_ON(ret);
719 goto release;
720 } else if (split == start) {
721 if (locked_end < extent_end) {
722 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
723 locked_end, extent_end - 1, GFP_NOFS);
724 if (!ret) {
725 btrfs_release_path(root, path);
726 lock_extent(&BTRFS_I(inode)->io_tree,
727 locked_end, extent_end - 1, GFP_NOFS);
728 locked_end = extent_end;
729 goto again;
730 }
731 locked_end = extent_end;
732 } 620 }
733 btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
734 } else {
735 BUG_ON(key.offset != start);
736 key.offset = split;
737 btrfs_set_file_extent_offset(leaf, fi, key.offset -
738 orig_offset);
739 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
740 btrfs_set_item_key_safe(trans, root, path, &key);
741 extent_end = split;
742 } 621 }
743 622
744 if (extent_end == end) { 623 if (start > key.offset && end == extent_end) {
745 split_end = 0;
746 extent_type = BTRFS_FILE_EXTENT_REG;
747 }
748 if (extent_end == end && split == start) {
749 other_start = end; 624 other_start = end;
750 other_end = 0; 625 other_end = 0;
751 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, 626 if (extent_mergeable(leaf, path->slots[0] + 1,
752 bytenr, &other_start, &other_end)) { 627 inode->i_ino, bytenr, orig_offset,
753 path->slots[0]++; 628 &other_start, &other_end)) {
754 fi = btrfs_item_ptr(leaf, path->slots[0], 629 fi = btrfs_item_ptr(leaf, path->slots[0],
755 struct btrfs_file_extent_item); 630 struct btrfs_file_extent_item);
756 key.offset = split;
757 btrfs_set_item_key_safe(trans, root, path, &key);
758 btrfs_set_file_extent_offset(leaf, fi, key.offset -
759 orig_offset);
760 btrfs_set_file_extent_num_bytes(leaf, fi, 631 btrfs_set_file_extent_num_bytes(leaf, fi,
761 other_end - split); 632 start - key.offset);
762 goto done; 633 path->slots[0]++;
763 } 634 new_key.offset = start;
764 } 635 btrfs_set_item_key_safe(trans, root, path, &new_key);
765 if (extent_end == end && split == end) { 636
766 other_start = 0;
767 other_end = start;
768 if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino,
769 bytenr, &other_start, &other_end)) {
770 path->slots[0]--;
771 fi = btrfs_item_ptr(leaf, path->slots[0], 637 fi = btrfs_item_ptr(leaf, path->slots[0],
772 struct btrfs_file_extent_item); 638 struct btrfs_file_extent_item);
773 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - 639 btrfs_set_file_extent_num_bytes(leaf, fi,
774 other_start); 640 other_end - start);
775 goto done; 641 btrfs_set_file_extent_offset(leaf, fi,
642 start - orig_offset);
643 btrfs_mark_buffer_dirty(leaf);
644 goto out;
776 } 645 }
777 } 646 }
778 647
779 btrfs_mark_buffer_dirty(leaf); 648 while (start > key.offset || end < extent_end) {
649 if (key.offset == start)
650 split = end;
780 651
781 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, 652 new_key.offset = split;
782 root->root_key.objectid, 653 ret = btrfs_duplicate_item(trans, root, path, &new_key);
783 inode->i_ino, orig_offset); 654 if (ret == -EAGAIN) {
784 BUG_ON(ret); 655 btrfs_release_path(root, path);
785 btrfs_release_path(root, path); 656 goto again;
657 }
658 BUG_ON(ret < 0);
786 659
787 key.offset = start; 660 leaf = path->nodes[0];
788 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi)); 661 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
789 BUG_ON(ret); 662 struct btrfs_file_extent_item);
663 btrfs_set_file_extent_num_bytes(leaf, fi,
664 split - key.offset);
790 665
791 leaf = path->nodes[0]; 666 fi = btrfs_item_ptr(leaf, path->slots[0],
792 fi = btrfs_item_ptr(leaf, path->slots[0], 667 struct btrfs_file_extent_item);
793 struct btrfs_file_extent_item); 668
794 btrfs_set_file_extent_generation(leaf, fi, trans->transid); 669 btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
795 btrfs_set_file_extent_type(leaf, fi, extent_type); 670 btrfs_set_file_extent_num_bytes(leaf, fi,
796 btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr); 671 extent_end - split);
797 btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes); 672 btrfs_mark_buffer_dirty(leaf);
798 btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset); 673
799 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset); 674 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
800 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); 675 root->root_key.objectid,
801 btrfs_set_file_extent_compression(leaf, fi, 0); 676 inode->i_ino, orig_offset);
802 btrfs_set_file_extent_encryption(leaf, fi, 0); 677 BUG_ON(ret);
803 btrfs_set_file_extent_other_encoding(leaf, fi, 0); 678
804done: 679 if (split == start) {
805 btrfs_mark_buffer_dirty(leaf); 680 key.offset = start;
806 681 } else {
807release: 682 BUG_ON(start != key.offset);
808 btrfs_release_path(root, path); 683 path->slots[0]--;
809 if (split_end && split == start) { 684 extent_end = end;
810 split = end; 685 }
811 goto again; 686 recow = 1;
812 } 687 }
813 if (locked_end > end) { 688
814 unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, 689 other_start = end;
815 GFP_NOFS); 690 other_end = 0;
691 if (extent_mergeable(leaf, path->slots[0] + 1,
692 inode->i_ino, bytenr, orig_offset,
693 &other_start, &other_end)) {
694 if (recow) {
695 btrfs_release_path(root, path);
696 goto again;
697 }
698 extent_end = other_end;
699 del_slot = path->slots[0] + 1;
700 del_nr++;
701 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
702 0, root->root_key.objectid,
703 inode->i_ino, orig_offset);
704 BUG_ON(ret);
705 }
706 other_start = 0;
707 other_end = start;
708 if (extent_mergeable(leaf, path->slots[0] - 1,
709 inode->i_ino, bytenr, orig_offset,
710 &other_start, &other_end)) {
711 if (recow) {
712 btrfs_release_path(root, path);
713 goto again;
714 }
715 key.offset = other_start;
716 del_slot = path->slots[0];
717 del_nr++;
718 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
719 0, root->root_key.objectid,
720 inode->i_ino, orig_offset);
721 BUG_ON(ret);
816 } 722 }
723 fi = btrfs_item_ptr(leaf, path->slots[0],
724 struct btrfs_file_extent_item);
725 if (del_nr == 0) {
726 btrfs_set_file_extent_type(leaf, fi,
727 BTRFS_FILE_EXTENT_REG);
728 btrfs_mark_buffer_dirty(leaf);
729 } else {
730 btrfs_set_file_extent_type(leaf, fi,
731 BTRFS_FILE_EXTENT_REG);
732 btrfs_set_file_extent_num_bytes(leaf, fi,
733 extent_end - key.offset);
734 btrfs_mark_buffer_dirty(leaf);
735
736 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
737 BUG_ON(ret);
738 }
739out:
817 btrfs_free_path(path); 740 btrfs_free_path(path);
818 return 0; 741 return 0;
819} 742}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b3ad168a0bfc..b330e27c2d8b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -88,13 +88,14 @@ static noinline int cow_file_range(struct inode *inode,
88 u64 start, u64 end, int *page_started, 88 u64 start, u64 end, int *page_started,
89 unsigned long *nr_written, int unlock); 89 unsigned long *nr_written, int unlock);
90 90
91static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) 91static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
92 struct inode *inode, struct inode *dir)
92{ 93{
93 int err; 94 int err;
94 95
95 err = btrfs_init_acl(inode, dir); 96 err = btrfs_init_acl(trans, inode, dir);
96 if (!err) 97 if (!err)
97 err = btrfs_xattr_security_init(inode, dir); 98 err = btrfs_xattr_security_init(trans, inode, dir);
98 return err; 99 return err;
99} 100}
100 101
@@ -188,8 +189,18 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
188 btrfs_mark_buffer_dirty(leaf); 189 btrfs_mark_buffer_dirty(leaf);
189 btrfs_free_path(path); 190 btrfs_free_path(path);
190 191
192 /*
193 * we're an inline extent, so nobody can
194 * extend the file past i_size without locking
195 * a page we already have locked.
196 *
197 * We must do any isize and inode updates
198 * before we unlock the pages. Otherwise we
199 * could end up racing with unlink.
200 */
191 BTRFS_I(inode)->disk_i_size = inode->i_size; 201 BTRFS_I(inode)->disk_i_size = inode->i_size;
192 btrfs_update_inode(trans, root, inode); 202 btrfs_update_inode(trans, root, inode);
203
193 return 0; 204 return 0;
194fail: 205fail:
195 btrfs_free_path(path); 206 btrfs_free_path(path);
@@ -230,8 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
230 return 1; 241 return 1;
231 } 242 }
232 243
233 ret = btrfs_drop_extents(trans, root, inode, start, 244 ret = btrfs_drop_extents(trans, inode, start, aligned_end,
234 aligned_end, aligned_end, start,
235 &hint_byte, 1); 245 &hint_byte, 1);
236 BUG_ON(ret); 246 BUG_ON(ret);
237 247
@@ -416,7 +426,6 @@ again:
416 start, end, 426 start, end,
417 total_compressed, pages); 427 total_compressed, pages);
418 } 428 }
419 btrfs_end_transaction(trans, root);
420 if (ret == 0) { 429 if (ret == 0) {
421 /* 430 /*
422 * inline extent creation worked, we don't need 431 * inline extent creation worked, we don't need
@@ -430,9 +439,11 @@ again:
430 EXTENT_CLEAR_DELALLOC | 439 EXTENT_CLEAR_DELALLOC |
431 EXTENT_CLEAR_ACCOUNTING | 440 EXTENT_CLEAR_ACCOUNTING |
432 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); 441 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
433 ret = 0; 442
443 btrfs_end_transaction(trans, root);
434 goto free_pages_out; 444 goto free_pages_out;
435 } 445 }
446 btrfs_end_transaction(trans, root);
436 } 447 }
437 448
438 if (will_compress) { 449 if (will_compress) {
@@ -543,7 +554,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
543 if (list_empty(&async_cow->extents)) 554 if (list_empty(&async_cow->extents))
544 return 0; 555 return 0;
545 556
546 trans = btrfs_join_transaction(root, 1);
547 557
548 while (!list_empty(&async_cow->extents)) { 558 while (!list_empty(&async_cow->extents)) {
549 async_extent = list_entry(async_cow->extents.next, 559 async_extent = list_entry(async_cow->extents.next,
@@ -590,19 +600,15 @@ retry:
590 lock_extent(io_tree, async_extent->start, 600 lock_extent(io_tree, async_extent->start,
591 async_extent->start + async_extent->ram_size - 1, 601 async_extent->start + async_extent->ram_size - 1,
592 GFP_NOFS); 602 GFP_NOFS);
593 /*
594 * here we're doing allocation and writeback of the
595 * compressed pages
596 */
597 btrfs_drop_extent_cache(inode, async_extent->start,
598 async_extent->start +
599 async_extent->ram_size - 1, 0);
600 603
604 trans = btrfs_join_transaction(root, 1);
601 ret = btrfs_reserve_extent(trans, root, 605 ret = btrfs_reserve_extent(trans, root,
602 async_extent->compressed_size, 606 async_extent->compressed_size,
603 async_extent->compressed_size, 607 async_extent->compressed_size,
604 0, alloc_hint, 608 0, alloc_hint,
605 (u64)-1, &ins, 1); 609 (u64)-1, &ins, 1);
610 btrfs_end_transaction(trans, root);
611
606 if (ret) { 612 if (ret) {
607 int i; 613 int i;
608 for (i = 0; i < async_extent->nr_pages; i++) { 614 for (i = 0; i < async_extent->nr_pages; i++) {
@@ -618,6 +624,14 @@ retry:
618 goto retry; 624 goto retry;
619 } 625 }
620 626
627 /*
628 * here we're doing allocation and writeback of the
629 * compressed pages
630 */
631 btrfs_drop_extent_cache(inode, async_extent->start,
632 async_extent->start +
633 async_extent->ram_size - 1, 0);
634
621 em = alloc_extent_map(GFP_NOFS); 635 em = alloc_extent_map(GFP_NOFS);
622 em->start = async_extent->start; 636 em->start = async_extent->start;
623 em->len = async_extent->ram_size; 637 em->len = async_extent->ram_size;
@@ -649,8 +663,6 @@ retry:
649 BTRFS_ORDERED_COMPRESSED); 663 BTRFS_ORDERED_COMPRESSED);
650 BUG_ON(ret); 664 BUG_ON(ret);
651 665
652 btrfs_end_transaction(trans, root);
653
654 /* 666 /*
655 * clear dirty, set writeback and unlock the pages. 667 * clear dirty, set writeback and unlock the pages.
656 */ 668 */
@@ -672,13 +684,11 @@ retry:
672 async_extent->nr_pages); 684 async_extent->nr_pages);
673 685
674 BUG_ON(ret); 686 BUG_ON(ret);
675 trans = btrfs_join_transaction(root, 1);
676 alloc_hint = ins.objectid + ins.offset; 687 alloc_hint = ins.objectid + ins.offset;
677 kfree(async_extent); 688 kfree(async_extent);
678 cond_resched(); 689 cond_resched();
679 } 690 }
680 691
681 btrfs_end_transaction(trans, root);
682 return 0; 692 return 0;
683} 693}
684 694
@@ -742,6 +752,7 @@ static noinline int cow_file_range(struct inode *inode,
742 EXTENT_CLEAR_DIRTY | 752 EXTENT_CLEAR_DIRTY |
743 EXTENT_SET_WRITEBACK | 753 EXTENT_SET_WRITEBACK |
744 EXTENT_END_WRITEBACK); 754 EXTENT_END_WRITEBACK);
755
745 *nr_written = *nr_written + 756 *nr_written = *nr_written +
746 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 757 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
747 *page_started = 1; 758 *page_started = 1;
@@ -1596,7 +1607,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1596 struct inode *inode, u64 file_pos, 1607 struct inode *inode, u64 file_pos,
1597 u64 disk_bytenr, u64 disk_num_bytes, 1608 u64 disk_bytenr, u64 disk_num_bytes,
1598 u64 num_bytes, u64 ram_bytes, 1609 u64 num_bytes, u64 ram_bytes,
1599 u64 locked_end,
1600 u8 compression, u8 encryption, 1610 u8 compression, u8 encryption,
1601 u16 other_encoding, int extent_type) 1611 u16 other_encoding, int extent_type)
1602{ 1612{
@@ -1622,9 +1632,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1622 * the caller is expected to unpin it and allow it to be merged 1632 * the caller is expected to unpin it and allow it to be merged
1623 * with the others. 1633 * with the others.
1624 */ 1634 */
1625 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1635 ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
1626 file_pos + num_bytes, locked_end, 1636 &hint, 0);
1627 file_pos, &hint, 0);
1628 BUG_ON(ret); 1637 BUG_ON(ret);
1629 1638
1630 ins.objectid = inode->i_ino; 1639 ins.objectid = inode->i_ino;
@@ -1730,23 +1739,32 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1730 } 1739 }
1731 } 1740 }
1732 1741
1733 trans = btrfs_join_transaction(root, 1);
1734
1735 if (!ordered_extent) 1742 if (!ordered_extent)
1736 ordered_extent = btrfs_lookup_ordered_extent(inode, start); 1743 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1737 BUG_ON(!ordered_extent); 1744 BUG_ON(!ordered_extent);
1738 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) 1745 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1739 goto nocow; 1746 BUG_ON(!list_empty(&ordered_extent->list));
1747 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1748 if (!ret) {
1749 trans = btrfs_join_transaction(root, 1);
1750 ret = btrfs_update_inode(trans, root, inode);
1751 BUG_ON(ret);
1752 btrfs_end_transaction(trans, root);
1753 }
1754 goto out;
1755 }
1740 1756
1741 lock_extent(io_tree, ordered_extent->file_offset, 1757 lock_extent(io_tree, ordered_extent->file_offset,
1742 ordered_extent->file_offset + ordered_extent->len - 1, 1758 ordered_extent->file_offset + ordered_extent->len - 1,
1743 GFP_NOFS); 1759 GFP_NOFS);
1744 1760
1761 trans = btrfs_join_transaction(root, 1);
1762
1745 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1763 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1746 compressed = 1; 1764 compressed = 1;
1747 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1765 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
1748 BUG_ON(compressed); 1766 BUG_ON(compressed);
1749 ret = btrfs_mark_extent_written(trans, root, inode, 1767 ret = btrfs_mark_extent_written(trans, inode,
1750 ordered_extent->file_offset, 1768 ordered_extent->file_offset,
1751 ordered_extent->file_offset + 1769 ordered_extent->file_offset +
1752 ordered_extent->len); 1770 ordered_extent->len);
@@ -1758,8 +1776,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1758 ordered_extent->disk_len, 1776 ordered_extent->disk_len,
1759 ordered_extent->len, 1777 ordered_extent->len,
1760 ordered_extent->len, 1778 ordered_extent->len,
1761 ordered_extent->file_offset +
1762 ordered_extent->len,
1763 compressed, 0, 0, 1779 compressed, 0, 0,
1764 BTRFS_FILE_EXTENT_REG); 1780 BTRFS_FILE_EXTENT_REG);
1765 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 1781 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
@@ -1770,22 +1786,20 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1770 unlock_extent(io_tree, ordered_extent->file_offset, 1786 unlock_extent(io_tree, ordered_extent->file_offset,
1771 ordered_extent->file_offset + ordered_extent->len - 1, 1787 ordered_extent->file_offset + ordered_extent->len - 1,
1772 GFP_NOFS); 1788 GFP_NOFS);
1773nocow:
1774 add_pending_csums(trans, inode, ordered_extent->file_offset, 1789 add_pending_csums(trans, inode, ordered_extent->file_offset,
1775 &ordered_extent->list); 1790 &ordered_extent->list);
1776 1791
1777 mutex_lock(&BTRFS_I(inode)->extent_mutex); 1792 /* this also removes the ordered extent from the tree */
1778 btrfs_ordered_update_i_size(inode, ordered_extent); 1793 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1779 btrfs_update_inode(trans, root, inode); 1794 ret = btrfs_update_inode(trans, root, inode);
1780 btrfs_remove_ordered_extent(inode, ordered_extent); 1795 BUG_ON(ret);
1781 mutex_unlock(&BTRFS_I(inode)->extent_mutex); 1796 btrfs_end_transaction(trans, root);
1782 1797out:
1783 /* once for us */ 1798 /* once for us */
1784 btrfs_put_ordered_extent(ordered_extent); 1799 btrfs_put_ordered_extent(ordered_extent);
1785 /* once for the tree */ 1800 /* once for the tree */
1786 btrfs_put_ordered_extent(ordered_extent); 1801 btrfs_put_ordered_extent(ordered_extent);
1787 1802
1788 btrfs_end_transaction(trans, root);
1789 return 0; 1803 return 0;
1790} 1804}
1791 1805
@@ -2008,6 +2022,54 @@ zeroit:
2008 return -EIO; 2022 return -EIO;
2009} 2023}
2010 2024
2025struct delayed_iput {
2026 struct list_head list;
2027 struct inode *inode;
2028};
2029
2030void btrfs_add_delayed_iput(struct inode *inode)
2031{
2032 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2033 struct delayed_iput *delayed;
2034
2035 if (atomic_add_unless(&inode->i_count, -1, 1))
2036 return;
2037
2038 delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
2039 delayed->inode = inode;
2040
2041 spin_lock(&fs_info->delayed_iput_lock);
2042 list_add_tail(&delayed->list, &fs_info->delayed_iputs);
2043 spin_unlock(&fs_info->delayed_iput_lock);
2044}
2045
2046void btrfs_run_delayed_iputs(struct btrfs_root *root)
2047{
2048 LIST_HEAD(list);
2049 struct btrfs_fs_info *fs_info = root->fs_info;
2050 struct delayed_iput *delayed;
2051 int empty;
2052
2053 spin_lock(&fs_info->delayed_iput_lock);
2054 empty = list_empty(&fs_info->delayed_iputs);
2055 spin_unlock(&fs_info->delayed_iput_lock);
2056 if (empty)
2057 return;
2058
2059 down_read(&root->fs_info->cleanup_work_sem);
2060 spin_lock(&fs_info->delayed_iput_lock);
2061 list_splice_init(&fs_info->delayed_iputs, &list);
2062 spin_unlock(&fs_info->delayed_iput_lock);
2063
2064 while (!list_empty(&list)) {
2065 delayed = list_entry(list.next, struct delayed_iput, list);
2066 list_del(&delayed->list);
2067 iput(delayed->inode);
2068 kfree(delayed);
2069 }
2070 up_read(&root->fs_info->cleanup_work_sem);
2071}
2072
2011/* 2073/*
2012 * This creates an orphan entry for the given inode in case something goes 2074 * This creates an orphan entry for the given inode in case something goes
2013 * wrong in the middle of an unlink/truncate. 2075 * wrong in the middle of an unlink/truncate.
@@ -2080,16 +2142,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2080 struct inode *inode; 2142 struct inode *inode;
2081 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2143 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2082 2144
2083 path = btrfs_alloc_path(); 2145 if (!xchg(&root->clean_orphans, 0))
2084 if (!path)
2085 return; 2146 return;
2147
2148 path = btrfs_alloc_path();
2149 BUG_ON(!path);
2086 path->reada = -1; 2150 path->reada = -1;
2087 2151
2088 key.objectid = BTRFS_ORPHAN_OBJECTID; 2152 key.objectid = BTRFS_ORPHAN_OBJECTID;
2089 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); 2153 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
2090 key.offset = (u64)-1; 2154 key.offset = (u64)-1;
2091 2155
2092
2093 while (1) { 2156 while (1) {
2094 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2157 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2095 if (ret < 0) { 2158 if (ret < 0) {
@@ -2834,37 +2897,40 @@ out:
2834 * min_type is the minimum key type to truncate down to. If set to 0, this 2897 * min_type is the minimum key type to truncate down to. If set to 0, this
2835 * will kill all the items on this inode, including the INODE_ITEM_KEY. 2898 * will kill all the items on this inode, including the INODE_ITEM_KEY.
2836 */ 2899 */
2837noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, 2900int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2838 struct btrfs_root *root, 2901 struct btrfs_root *root,
2839 struct inode *inode, 2902 struct inode *inode,
2840 u64 new_size, u32 min_type) 2903 u64 new_size, u32 min_type)
2841{ 2904{
2842 int ret;
2843 struct btrfs_path *path; 2905 struct btrfs_path *path;
2844 struct btrfs_key key;
2845 struct btrfs_key found_key;
2846 u32 found_type = (u8)-1;
2847 struct extent_buffer *leaf; 2906 struct extent_buffer *leaf;
2848 struct btrfs_file_extent_item *fi; 2907 struct btrfs_file_extent_item *fi;
2908 struct btrfs_key key;
2909 struct btrfs_key found_key;
2849 u64 extent_start = 0; 2910 u64 extent_start = 0;
2850 u64 extent_num_bytes = 0; 2911 u64 extent_num_bytes = 0;
2851 u64 extent_offset = 0; 2912 u64 extent_offset = 0;
2852 u64 item_end = 0; 2913 u64 item_end = 0;
2914 u64 mask = root->sectorsize - 1;
2915 u32 found_type = (u8)-1;
2853 int found_extent; 2916 int found_extent;
2854 int del_item; 2917 int del_item;
2855 int pending_del_nr = 0; 2918 int pending_del_nr = 0;
2856 int pending_del_slot = 0; 2919 int pending_del_slot = 0;
2857 int extent_type = -1; 2920 int extent_type = -1;
2858 int encoding; 2921 int encoding;
2859 u64 mask = root->sectorsize - 1; 2922 int ret;
2923 int err = 0;
2924
2925 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
2860 2926
2861 if (root->ref_cows) 2927 if (root->ref_cows)
2862 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 2928 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
2929
2863 path = btrfs_alloc_path(); 2930 path = btrfs_alloc_path();
2864 BUG_ON(!path); 2931 BUG_ON(!path);
2865 path->reada = -1; 2932 path->reada = -1;
2866 2933
2867 /* FIXME, add redo link to tree so we don't leak on crash */
2868 key.objectid = inode->i_ino; 2934 key.objectid = inode->i_ino;
2869 key.offset = (u64)-1; 2935 key.offset = (u64)-1;
2870 key.type = (u8)-1; 2936 key.type = (u8)-1;
@@ -2872,17 +2938,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2872search_again: 2938search_again:
2873 path->leave_spinning = 1; 2939 path->leave_spinning = 1;
2874 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2940 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2875 if (ret < 0) 2941 if (ret < 0) {
2876 goto error; 2942 err = ret;
2943 goto out;
2944 }
2877 2945
2878 if (ret > 0) { 2946 if (ret > 0) {
2879 /* there are no items in the tree for us to truncate, we're 2947 /* there are no items in the tree for us to truncate, we're
2880 * done 2948 * done
2881 */ 2949 */
2882 if (path->slots[0] == 0) { 2950 if (path->slots[0] == 0)
2883 ret = 0; 2951 goto out;
2884 goto error;
2885 }
2886 path->slots[0]--; 2952 path->slots[0]--;
2887 } 2953 }
2888 2954
@@ -2917,28 +2983,17 @@ search_again:
2917 } 2983 }
2918 item_end--; 2984 item_end--;
2919 } 2985 }
2920 if (item_end < new_size) { 2986 if (found_type > min_type) {
2921 if (found_type == BTRFS_DIR_ITEM_KEY) 2987 del_item = 1;
2922 found_type = BTRFS_INODE_ITEM_KEY; 2988 } else {
2923 else if (found_type == BTRFS_EXTENT_ITEM_KEY) 2989 if (item_end < new_size)
2924 found_type = BTRFS_EXTENT_DATA_KEY;
2925 else if (found_type == BTRFS_EXTENT_DATA_KEY)
2926 found_type = BTRFS_XATTR_ITEM_KEY;
2927 else if (found_type == BTRFS_XATTR_ITEM_KEY)
2928 found_type = BTRFS_INODE_REF_KEY;
2929 else if (found_type)
2930 found_type--;
2931 else
2932 break; 2990 break;
2933 btrfs_set_key_type(&key, found_type); 2991 if (found_key.offset >= new_size)
2934 goto next; 2992 del_item = 1;
2993 else
2994 del_item = 0;
2935 } 2995 }
2936 if (found_key.offset >= new_size)
2937 del_item = 1;
2938 else
2939 del_item = 0;
2940 found_extent = 0; 2996 found_extent = 0;
2941
2942 /* FIXME, shrink the extent if the ref count is only 1 */ 2997 /* FIXME, shrink the extent if the ref count is only 1 */
2943 if (found_type != BTRFS_EXTENT_DATA_KEY) 2998 if (found_type != BTRFS_EXTENT_DATA_KEY)
2944 goto delete; 2999 goto delete;
@@ -3025,42 +3080,36 @@ delete:
3025 inode->i_ino, extent_offset); 3080 inode->i_ino, extent_offset);
3026 BUG_ON(ret); 3081 BUG_ON(ret);
3027 } 3082 }
3028next:
3029 if (path->slots[0] == 0) {
3030 if (pending_del_nr)
3031 goto del_pending;
3032 btrfs_release_path(root, path);
3033 if (found_type == BTRFS_INODE_ITEM_KEY)
3034 break;
3035 goto search_again;
3036 }
3037 3083
3038 path->slots[0]--; 3084 if (found_type == BTRFS_INODE_ITEM_KEY)
3039 if (pending_del_nr && 3085 break;
3040 path->slots[0] + 1 != pending_del_slot) { 3086
3041 struct btrfs_key debug; 3087 if (path->slots[0] == 0 ||
3042del_pending: 3088 path->slots[0] != pending_del_slot) {
3043 btrfs_item_key_to_cpu(path->nodes[0], &debug, 3089 if (root->ref_cows) {
3044 pending_del_slot); 3090 err = -EAGAIN;
3045 ret = btrfs_del_items(trans, root, path, 3091 goto out;
3046 pending_del_slot, 3092 }
3047 pending_del_nr); 3093 if (pending_del_nr) {
3048 BUG_ON(ret); 3094 ret = btrfs_del_items(trans, root, path,
3049 pending_del_nr = 0; 3095 pending_del_slot,
3096 pending_del_nr);
3097 BUG_ON(ret);
3098 pending_del_nr = 0;
3099 }
3050 btrfs_release_path(root, path); 3100 btrfs_release_path(root, path);
3051 if (found_type == BTRFS_INODE_ITEM_KEY)
3052 break;
3053 goto search_again; 3101 goto search_again;
3102 } else {
3103 path->slots[0]--;
3054 } 3104 }
3055 } 3105 }
3056 ret = 0; 3106out:
3057error:
3058 if (pending_del_nr) { 3107 if (pending_del_nr) {
3059 ret = btrfs_del_items(trans, root, path, pending_del_slot, 3108 ret = btrfs_del_items(trans, root, path, pending_del_slot,
3060 pending_del_nr); 3109 pending_del_nr);
3061 } 3110 }
3062 btrfs_free_path(path); 3111 btrfs_free_path(path);
3063 return ret; 3112 return err;
3064} 3113}
3065 3114
3066/* 3115/*
@@ -3180,10 +3229,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3180 if (size <= hole_start) 3229 if (size <= hole_start)
3181 return 0; 3230 return 0;
3182 3231
3183 err = btrfs_truncate_page(inode->i_mapping, inode->i_size);
3184 if (err)
3185 return err;
3186
3187 while (1) { 3232 while (1) {
3188 struct btrfs_ordered_extent *ordered; 3233 struct btrfs_ordered_extent *ordered;
3189 btrfs_wait_ordered_range(inode, hole_start, 3234 btrfs_wait_ordered_range(inode, hole_start,
@@ -3196,9 +3241,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3196 btrfs_put_ordered_extent(ordered); 3241 btrfs_put_ordered_extent(ordered);
3197 } 3242 }
3198 3243
3199 trans = btrfs_start_transaction(root, 1);
3200 btrfs_set_trans_block_group(trans, inode);
3201
3202 cur_offset = hole_start; 3244 cur_offset = hole_start;
3203 while (1) { 3245 while (1) {
3204 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 3246 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
@@ -3206,40 +3248,120 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3206 BUG_ON(IS_ERR(em) || !em); 3248 BUG_ON(IS_ERR(em) || !em);
3207 last_byte = min(extent_map_end(em), block_end); 3249 last_byte = min(extent_map_end(em), block_end);
3208 last_byte = (last_byte + mask) & ~mask; 3250 last_byte = (last_byte + mask) & ~mask;
3209 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { 3251 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
3210 u64 hint_byte = 0; 3252 u64 hint_byte = 0;
3211 hole_size = last_byte - cur_offset; 3253 hole_size = last_byte - cur_offset;
3212 err = btrfs_drop_extents(trans, root, inode,
3213 cur_offset,
3214 cur_offset + hole_size,
3215 block_end,
3216 cur_offset, &hint_byte, 1);
3217 if (err)
3218 break;
3219 3254
3220 err = btrfs_reserve_metadata_space(root, 1); 3255 err = btrfs_reserve_metadata_space(root, 2);
3221 if (err) 3256 if (err)
3222 break; 3257 break;
3223 3258
3259 trans = btrfs_start_transaction(root, 1);
3260 btrfs_set_trans_block_group(trans, inode);
3261
3262 err = btrfs_drop_extents(trans, inode, cur_offset,
3263 cur_offset + hole_size,
3264 &hint_byte, 1);
3265 BUG_ON(err);
3266
3224 err = btrfs_insert_file_extent(trans, root, 3267 err = btrfs_insert_file_extent(trans, root,
3225 inode->i_ino, cur_offset, 0, 3268 inode->i_ino, cur_offset, 0,
3226 0, hole_size, 0, hole_size, 3269 0, hole_size, 0, hole_size,
3227 0, 0, 0); 3270 0, 0, 0);
3271 BUG_ON(err);
3272
3228 btrfs_drop_extent_cache(inode, hole_start, 3273 btrfs_drop_extent_cache(inode, hole_start,
3229 last_byte - 1, 0); 3274 last_byte - 1, 0);
3230 btrfs_unreserve_metadata_space(root, 1); 3275
3276 btrfs_end_transaction(trans, root);
3277 btrfs_unreserve_metadata_space(root, 2);
3231 } 3278 }
3232 free_extent_map(em); 3279 free_extent_map(em);
3233 cur_offset = last_byte; 3280 cur_offset = last_byte;
3234 if (err || cur_offset >= block_end) 3281 if (cur_offset >= block_end)
3235 break; 3282 break;
3236 } 3283 }
3237 3284
3238 btrfs_end_transaction(trans, root);
3239 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3285 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
3240 return err; 3286 return err;
3241} 3287}
3242 3288
3289static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3290{
3291 struct btrfs_root *root = BTRFS_I(inode)->root;
3292 struct btrfs_trans_handle *trans;
3293 unsigned long nr;
3294 int ret;
3295
3296 if (attr->ia_size == inode->i_size)
3297 return 0;
3298
3299 if (attr->ia_size > inode->i_size) {
3300 unsigned long limit;
3301 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
3302 if (attr->ia_size > inode->i_sb->s_maxbytes)
3303 return -EFBIG;
3304 if (limit != RLIM_INFINITY && attr->ia_size > limit) {
3305 send_sig(SIGXFSZ, current, 0);
3306 return -EFBIG;
3307 }
3308 }
3309
3310 ret = btrfs_reserve_metadata_space(root, 1);
3311 if (ret)
3312 return ret;
3313
3314 trans = btrfs_start_transaction(root, 1);
3315 btrfs_set_trans_block_group(trans, inode);
3316
3317 ret = btrfs_orphan_add(trans, inode);
3318 BUG_ON(ret);
3319
3320 nr = trans->blocks_used;
3321 btrfs_end_transaction(trans, root);
3322 btrfs_unreserve_metadata_space(root, 1);
3323 btrfs_btree_balance_dirty(root, nr);
3324
3325 if (attr->ia_size > inode->i_size) {
3326 ret = btrfs_cont_expand(inode, attr->ia_size);
3327 if (ret) {
3328 btrfs_truncate(inode);
3329 return ret;
3330 }
3331
3332 i_size_write(inode, attr->ia_size);
3333 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
3334
3335 trans = btrfs_start_transaction(root, 1);
3336 btrfs_set_trans_block_group(trans, inode);
3337
3338 ret = btrfs_update_inode(trans, root, inode);
3339 BUG_ON(ret);
3340 if (inode->i_nlink > 0) {
3341 ret = btrfs_orphan_del(trans, inode);
3342 BUG_ON(ret);
3343 }
3344 nr = trans->blocks_used;
3345 btrfs_end_transaction(trans, root);
3346 btrfs_btree_balance_dirty(root, nr);
3347 return 0;
3348 }
3349
3350 /*
3351 * We're truncating a file that used to have good data down to
3352 * zero. Make sure it gets into the ordered flush list so that
3353 * any new writes get down to disk quickly.
3354 */
3355 if (attr->ia_size == 0)
3356 BTRFS_I(inode)->ordered_data_close = 1;
3357
3358 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3359 ret = vmtruncate(inode, attr->ia_size);
3360 BUG_ON(ret);
3361
3362 return 0;
3363}
3364
3243static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) 3365static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3244{ 3366{
3245 struct inode *inode = dentry->d_inode; 3367 struct inode *inode = dentry->d_inode;
@@ -3250,23 +3372,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3250 return err; 3372 return err;
3251 3373
3252 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 3374 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
3253 if (attr->ia_size > inode->i_size) { 3375 err = btrfs_setattr_size(inode, attr);
3254 err = btrfs_cont_expand(inode, attr->ia_size); 3376 if (err)
3255 if (err) 3377 return err;
3256 return err;
3257 } else if (inode->i_size > 0 &&
3258 attr->ia_size == 0) {
3259
3260 /* we're truncating a file that used to have good
3261 * data down to zero. Make sure it gets into
3262 * the ordered flush list so that any new writes
3263 * get down to disk quickly.
3264 */
3265 BTRFS_I(inode)->ordered_data_close = 1;
3266 }
3267 } 3378 }
3379 attr->ia_valid &= ~ATTR_SIZE;
3268 3380
3269 err = inode_setattr(inode, attr); 3381 if (attr->ia_valid)
3382 err = inode_setattr(inode, attr);
3270 3383
3271 if (!err && ((attr->ia_valid & ATTR_MODE))) 3384 if (!err && ((attr->ia_valid & ATTR_MODE)))
3272 err = btrfs_acl_chmod(inode); 3385 err = btrfs_acl_chmod(inode);
@@ -3287,36 +3400,43 @@ void btrfs_delete_inode(struct inode *inode)
3287 } 3400 }
3288 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3401 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3289 3402
3403 if (root->fs_info->log_root_recovering) {
3404 BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
3405 goto no_delete;
3406 }
3407
3290 if (inode->i_nlink > 0) { 3408 if (inode->i_nlink > 0) {
3291 BUG_ON(btrfs_root_refs(&root->root_item) != 0); 3409 BUG_ON(btrfs_root_refs(&root->root_item) != 0);
3292 goto no_delete; 3410 goto no_delete;
3293 } 3411 }
3294 3412
3295 btrfs_i_size_write(inode, 0); 3413 btrfs_i_size_write(inode, 0);
3296 trans = btrfs_join_transaction(root, 1);
3297 3414
3298 btrfs_set_trans_block_group(trans, inode); 3415 while (1) {
3299 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); 3416 trans = btrfs_start_transaction(root, 1);
3300 if (ret) { 3417 btrfs_set_trans_block_group(trans, inode);
3301 btrfs_orphan_del(NULL, inode); 3418 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
3302 goto no_delete_lock;
3303 }
3304 3419
3305 btrfs_orphan_del(trans, inode); 3420 if (ret != -EAGAIN)
3421 break;
3306 3422
3307 nr = trans->blocks_used; 3423 nr = trans->blocks_used;
3308 clear_inode(inode); 3424 btrfs_end_transaction(trans, root);
3425 trans = NULL;
3426 btrfs_btree_balance_dirty(root, nr);
3427 }
3309 3428
3310 btrfs_end_transaction(trans, root); 3429 if (ret == 0) {
3311 btrfs_btree_balance_dirty(root, nr); 3430 ret = btrfs_orphan_del(trans, inode);
3312 return; 3431 BUG_ON(ret);
3432 }
3313 3433
3314no_delete_lock:
3315 nr = trans->blocks_used; 3434 nr = trans->blocks_used;
3316 btrfs_end_transaction(trans, root); 3435 btrfs_end_transaction(trans, root);
3317 btrfs_btree_balance_dirty(root, nr); 3436 btrfs_btree_balance_dirty(root, nr);
3318no_delete: 3437no_delete:
3319 clear_inode(inode); 3438 clear_inode(inode);
3439 return;
3320} 3440}
3321 3441
3322/* 3442/*
@@ -3569,7 +3689,6 @@ static noinline void init_btrfs_i(struct inode *inode)
3569 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); 3689 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3570 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); 3690 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3571 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); 3691 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3572 mutex_init(&BTRFS_I(inode)->extent_mutex);
3573 mutex_init(&BTRFS_I(inode)->log_mutex); 3692 mutex_init(&BTRFS_I(inode)->log_mutex);
3574} 3693}
3575 3694
@@ -3677,6 +3796,12 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3677 3796
3678 if (location.type == BTRFS_INODE_ITEM_KEY) { 3797 if (location.type == BTRFS_INODE_ITEM_KEY) {
3679 inode = btrfs_iget(dir->i_sb, &location, root); 3798 inode = btrfs_iget(dir->i_sb, &location, root);
3799 if (unlikely(root->clean_orphans) &&
3800 !(inode->i_sb->s_flags & MS_RDONLY)) {
3801 down_read(&root->fs_info->cleanup_work_sem);
3802 btrfs_orphan_cleanup(root);
3803 up_read(&root->fs_info->cleanup_work_sem);
3804 }
3680 return inode; 3805 return inode;
3681 } 3806 }
3682 3807
@@ -3695,6 +3820,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3695 } 3820 }
3696 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 3821 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
3697 3822
3823 if (root != sub_root) {
3824 down_read(&root->fs_info->cleanup_work_sem);
3825 if (!(inode->i_sb->s_flags & MS_RDONLY))
3826 btrfs_orphan_cleanup(sub_root);
3827 up_read(&root->fs_info->cleanup_work_sem);
3828 }
3829
3698 return inode; 3830 return inode;
3699} 3831}
3700 3832
@@ -3869,7 +4001,11 @@ skip:
3869 4001
3870 /* Reached end of directory/root. Bump pos past the last item. */ 4002 /* Reached end of directory/root. Bump pos past the last item. */
3871 if (key_type == BTRFS_DIR_INDEX_KEY) 4003 if (key_type == BTRFS_DIR_INDEX_KEY)
3872 filp->f_pos = INT_LIMIT(off_t); 4004 /*
4005 * 32-bit glibc will use getdents64, but then strtol -
4006 * so the last number we can serve is this.
4007 */
4008 filp->f_pos = 0x7fffffff;
3873 else 4009 else
3874 filp->f_pos++; 4010 filp->f_pos++;
3875nopos: 4011nopos:
@@ -4219,7 +4355,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4219 if (IS_ERR(inode)) 4355 if (IS_ERR(inode))
4220 goto out_unlock; 4356 goto out_unlock;
4221 4357
4222 err = btrfs_init_inode_security(inode, dir); 4358 err = btrfs_init_inode_security(trans, inode, dir);
4223 if (err) { 4359 if (err) {
4224 drop_inode = 1; 4360 drop_inode = 1;
4225 goto out_unlock; 4361 goto out_unlock;
@@ -4290,7 +4426,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4290 if (IS_ERR(inode)) 4426 if (IS_ERR(inode))
4291 goto out_unlock; 4427 goto out_unlock;
4292 4428
4293 err = btrfs_init_inode_security(inode, dir); 4429 err = btrfs_init_inode_security(trans, inode, dir);
4294 if (err) { 4430 if (err) {
4295 drop_inode = 1; 4431 drop_inode = 1;
4296 goto out_unlock; 4432 goto out_unlock;
@@ -4336,6 +4472,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4336 if (inode->i_nlink == 0) 4472 if (inode->i_nlink == 0)
4337 return -ENOENT; 4473 return -ENOENT;
4338 4474
4475 /* do not allow sys_link's with other subvols of the same device */
4476 if (root->objectid != BTRFS_I(inode)->root->objectid)
4477 return -EPERM;
4478
4339 /* 4479 /*
4340 * 1 item for inode ref 4480 * 1 item for inode ref
4341 * 2 items for dir items 4481 * 2 items for dir items
@@ -4423,7 +4563,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4423 4563
4424 drop_on_err = 1; 4564 drop_on_err = 1;
4425 4565
4426 err = btrfs_init_inode_security(inode, dir); 4566 err = btrfs_init_inode_security(trans, inode, dir);
4427 if (err) 4567 if (err)
4428 goto out_fail; 4568 goto out_fail;
4429 4569
@@ -5074,17 +5214,20 @@ static void btrfs_truncate(struct inode *inode)
5074 unsigned long nr; 5214 unsigned long nr;
5075 u64 mask = root->sectorsize - 1; 5215 u64 mask = root->sectorsize - 1;
5076 5216
5077 if (!S_ISREG(inode->i_mode)) 5217 if (!S_ISREG(inode->i_mode)) {
5078 return; 5218 WARN_ON(1);
5079 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
5080 return; 5219 return;
5220 }
5081 5221
5082 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); 5222 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
5083 if (ret) 5223 if (ret)
5084 return; 5224 return;
5225
5085 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 5226 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
5227 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
5086 5228
5087 trans = btrfs_start_transaction(root, 1); 5229 trans = btrfs_start_transaction(root, 1);
5230 btrfs_set_trans_block_group(trans, inode);
5088 5231
5089 /* 5232 /*
5090 * setattr is responsible for setting the ordered_data_close flag, 5233 * setattr is responsible for setting the ordered_data_close flag,
@@ -5106,21 +5249,32 @@ static void btrfs_truncate(struct inode *inode)
5106 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) 5249 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
5107 btrfs_add_ordered_operation(trans, root, inode); 5250 btrfs_add_ordered_operation(trans, root, inode);
5108 5251
5109 btrfs_set_trans_block_group(trans, inode); 5252 while (1) {
5110 btrfs_i_size_write(inode, inode->i_size); 5253 ret = btrfs_truncate_inode_items(trans, root, inode,
5254 inode->i_size,
5255 BTRFS_EXTENT_DATA_KEY);
5256 if (ret != -EAGAIN)
5257 break;
5111 5258
5112 ret = btrfs_orphan_add(trans, inode); 5259 ret = btrfs_update_inode(trans, root, inode);
5113 if (ret) 5260 BUG_ON(ret);
5114 goto out; 5261
5115 /* FIXME, add redo link to tree so we don't leak on crash */ 5262 nr = trans->blocks_used;
5116 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 5263 btrfs_end_transaction(trans, root);
5117 BTRFS_EXTENT_DATA_KEY); 5264 btrfs_btree_balance_dirty(root, nr);
5118 btrfs_update_inode(trans, root, inode); 5265
5266 trans = btrfs_start_transaction(root, 1);
5267 btrfs_set_trans_block_group(trans, inode);
5268 }
5119 5269
5120 ret = btrfs_orphan_del(trans, inode); 5270 if (ret == 0 && inode->i_nlink > 0) {
5271 ret = btrfs_orphan_del(trans, inode);
5272 BUG_ON(ret);
5273 }
5274
5275 ret = btrfs_update_inode(trans, root, inode);
5121 BUG_ON(ret); 5276 BUG_ON(ret);
5122 5277
5123out:
5124 nr = trans->blocks_used; 5278 nr = trans->blocks_used;
5125 ret = btrfs_end_transaction_throttle(trans, root); 5279 ret = btrfs_end_transaction_throttle(trans, root);
5126 BUG_ON(ret); 5280 BUG_ON(ret);
@@ -5217,9 +5371,9 @@ void btrfs_destroy_inode(struct inode *inode)
5217 5371
5218 spin_lock(&root->list_lock); 5372 spin_lock(&root->list_lock);
5219 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 5373 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
5220 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" 5374 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
5221 " list\n", inode->i_ino); 5375 inode->i_ino);
5222 dump_stack(); 5376 list_del_init(&BTRFS_I(inode)->i_orphan);
5223 } 5377 }
5224 spin_unlock(&root->list_lock); 5378 spin_unlock(&root->list_lock);
5225 5379
@@ -5476,7 +5630,7 @@ out_fail:
5476 * some fairly slow code that needs optimization. This walks the list 5630 * some fairly slow code that needs optimization. This walks the list
5477 * of all the inodes with pending delalloc and forces them to disk. 5631 * of all the inodes with pending delalloc and forces them to disk.
5478 */ 5632 */
5479int btrfs_start_delalloc_inodes(struct btrfs_root *root) 5633int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
5480{ 5634{
5481 struct list_head *head = &root->fs_info->delalloc_inodes; 5635 struct list_head *head = &root->fs_info->delalloc_inodes;
5482 struct btrfs_inode *binode; 5636 struct btrfs_inode *binode;
@@ -5495,7 +5649,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
5495 spin_unlock(&root->fs_info->delalloc_lock); 5649 spin_unlock(&root->fs_info->delalloc_lock);
5496 if (inode) { 5650 if (inode) {
5497 filemap_flush(inode->i_mapping); 5651 filemap_flush(inode->i_mapping);
5498 iput(inode); 5652 if (delay_iput)
5653 btrfs_add_delayed_iput(inode);
5654 else
5655 iput(inode);
5499 } 5656 }
5500 cond_resched(); 5657 cond_resched();
5501 spin_lock(&root->fs_info->delalloc_lock); 5658 spin_lock(&root->fs_info->delalloc_lock);
@@ -5569,7 +5726,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5569 if (IS_ERR(inode)) 5726 if (IS_ERR(inode))
5570 goto out_unlock; 5727 goto out_unlock;
5571 5728
5572 err = btrfs_init_inode_security(inode, dir); 5729 err = btrfs_init_inode_security(trans, inode, dir);
5573 if (err) { 5730 if (err) {
5574 drop_inode = 1; 5731 drop_inode = 1;
5575 goto out_unlock; 5732 goto out_unlock;
@@ -5641,10 +5798,10 @@ out_fail:
5641 return err; 5798 return err;
5642} 5799}
5643 5800
5644static int prealloc_file_range(struct btrfs_trans_handle *trans, 5801static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5645 struct inode *inode, u64 start, u64 end, 5802 u64 alloc_hint, int mode)
5646 u64 locked_end, u64 alloc_hint, int mode)
5647{ 5803{
5804 struct btrfs_trans_handle *trans;
5648 struct btrfs_root *root = BTRFS_I(inode)->root; 5805 struct btrfs_root *root = BTRFS_I(inode)->root;
5649 struct btrfs_key ins; 5806 struct btrfs_key ins;
5650 u64 alloc_size; 5807 u64 alloc_size;
@@ -5655,43 +5812,56 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
5655 while (num_bytes > 0) { 5812 while (num_bytes > 0) {
5656 alloc_size = min(num_bytes, root->fs_info->max_extent); 5813 alloc_size = min(num_bytes, root->fs_info->max_extent);
5657 5814
5658 ret = btrfs_reserve_metadata_space(root, 1); 5815 trans = btrfs_start_transaction(root, 1);
5659 if (ret)
5660 goto out;
5661 5816
5662 ret = btrfs_reserve_extent(trans, root, alloc_size, 5817 ret = btrfs_reserve_extent(trans, root, alloc_size,
5663 root->sectorsize, 0, alloc_hint, 5818 root->sectorsize, 0, alloc_hint,
5664 (u64)-1, &ins, 1); 5819 (u64)-1, &ins, 1);
5665 if (ret) { 5820 if (ret) {
5666 WARN_ON(1); 5821 WARN_ON(1);
5667 goto out; 5822 goto stop_trans;
5823 }
5824
5825 ret = btrfs_reserve_metadata_space(root, 3);
5826 if (ret) {
5827 btrfs_free_reserved_extent(root, ins.objectid,
5828 ins.offset);
5829 goto stop_trans;
5668 } 5830 }
5831
5669 ret = insert_reserved_file_extent(trans, inode, 5832 ret = insert_reserved_file_extent(trans, inode,
5670 cur_offset, ins.objectid, 5833 cur_offset, ins.objectid,
5671 ins.offset, ins.offset, 5834 ins.offset, ins.offset,
5672 ins.offset, locked_end, 5835 ins.offset, 0, 0, 0,
5673 0, 0, 0,
5674 BTRFS_FILE_EXTENT_PREALLOC); 5836 BTRFS_FILE_EXTENT_PREALLOC);
5675 BUG_ON(ret); 5837 BUG_ON(ret);
5676 btrfs_drop_extent_cache(inode, cur_offset, 5838 btrfs_drop_extent_cache(inode, cur_offset,
5677 cur_offset + ins.offset -1, 0); 5839 cur_offset + ins.offset -1, 0);
5840
5678 num_bytes -= ins.offset; 5841 num_bytes -= ins.offset;
5679 cur_offset += ins.offset; 5842 cur_offset += ins.offset;
5680 alloc_hint = ins.objectid + ins.offset; 5843 alloc_hint = ins.objectid + ins.offset;
5681 btrfs_unreserve_metadata_space(root, 1); 5844
5682 }
5683out:
5684 if (cur_offset > start) {
5685 inode->i_ctime = CURRENT_TIME; 5845 inode->i_ctime = CURRENT_TIME;
5686 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 5846 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5687 if (!(mode & FALLOC_FL_KEEP_SIZE) && 5847 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5688 cur_offset > i_size_read(inode)) 5848 cur_offset > inode->i_size) {
5689 btrfs_i_size_write(inode, cur_offset); 5849 i_size_write(inode, cur_offset);
5850 btrfs_ordered_update_i_size(inode, cur_offset, NULL);
5851 }
5852
5690 ret = btrfs_update_inode(trans, root, inode); 5853 ret = btrfs_update_inode(trans, root, inode);
5691 BUG_ON(ret); 5854 BUG_ON(ret);
5855
5856 btrfs_end_transaction(trans, root);
5857 btrfs_unreserve_metadata_space(root, 3);
5692 } 5858 }
5859 return ret;
5693 5860
5861stop_trans:
5862 btrfs_end_transaction(trans, root);
5694 return ret; 5863 return ret;
5864
5695} 5865}
5696 5866
5697static long btrfs_fallocate(struct inode *inode, int mode, 5867static long btrfs_fallocate(struct inode *inode, int mode,
@@ -5705,8 +5875,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5705 u64 locked_end; 5875 u64 locked_end;
5706 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5876 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5707 struct extent_map *em; 5877 struct extent_map *em;
5708 struct btrfs_trans_handle *trans;
5709 struct btrfs_root *root;
5710 int ret; 5878 int ret;
5711 5879
5712 alloc_start = offset & ~mask; 5880 alloc_start = offset & ~mask;
@@ -5725,9 +5893,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5725 goto out; 5893 goto out;
5726 } 5894 }
5727 5895
5728 root = BTRFS_I(inode)->root; 5896 ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode,
5729
5730 ret = btrfs_check_data_free_space(root, inode,
5731 alloc_end - alloc_start); 5897 alloc_end - alloc_start);
5732 if (ret) 5898 if (ret)
5733 goto out; 5899 goto out;
@@ -5736,12 +5902,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5736 while (1) { 5902 while (1) {
5737 struct btrfs_ordered_extent *ordered; 5903 struct btrfs_ordered_extent *ordered;
5738 5904
5739 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
5740 if (!trans) {
5741 ret = -EIO;
5742 goto out_free;
5743 }
5744
5745 /* the extent lock is ordered inside the running 5905 /* the extent lock is ordered inside the running
5746 * transaction 5906 * transaction
5747 */ 5907 */
@@ -5755,8 +5915,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5755 btrfs_put_ordered_extent(ordered); 5915 btrfs_put_ordered_extent(ordered);
5756 unlock_extent(&BTRFS_I(inode)->io_tree, 5916 unlock_extent(&BTRFS_I(inode)->io_tree,
5757 alloc_start, locked_end, GFP_NOFS); 5917 alloc_start, locked_end, GFP_NOFS);
5758 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5759
5760 /* 5918 /*
5761 * we can't wait on the range with the transaction 5919 * we can't wait on the range with the transaction
5762 * running or with the extent lock held 5920 * running or with the extent lock held
@@ -5777,10 +5935,12 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5777 BUG_ON(IS_ERR(em) || !em); 5935 BUG_ON(IS_ERR(em) || !em);
5778 last_byte = min(extent_map_end(em), alloc_end); 5936 last_byte = min(extent_map_end(em), alloc_end);
5779 last_byte = (last_byte + mask) & ~mask; 5937 last_byte = (last_byte + mask) & ~mask;
5780 if (em->block_start == EXTENT_MAP_HOLE) { 5938 if (em->block_start == EXTENT_MAP_HOLE ||
5781 ret = prealloc_file_range(trans, inode, cur_offset, 5939 (cur_offset >= inode->i_size &&
5782 last_byte, locked_end + 1, 5940 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5783 alloc_hint, mode); 5941 ret = prealloc_file_range(inode,
5942 cur_offset, last_byte,
5943 alloc_hint, mode);
5784 if (ret < 0) { 5944 if (ret < 0) {
5785 free_extent_map(em); 5945 free_extent_map(em);
5786 break; 5946 break;
@@ -5799,9 +5959,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5799 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 5959 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5800 GFP_NOFS); 5960 GFP_NOFS);
5801 5961
5802 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5962 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
5803out_free: 5963 alloc_end - alloc_start);
5804 btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
5805out: 5964out:
5806 mutex_unlock(&inode->i_mutex); 5965 mutex_unlock(&inode->i_mutex);
5807 return ret; 5966 return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cdbb054102b9..645a17927a8f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -237,7 +237,6 @@ static noinline int create_subvol(struct btrfs_root *root,
237 u64 objectid; 237 u64 objectid;
238 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 238 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
239 u64 index = 0; 239 u64 index = 0;
240 unsigned long nr = 1;
241 240
242 /* 241 /*
243 * 1 - inode item 242 * 1 - inode item
@@ -290,7 +289,7 @@ static noinline int create_subvol(struct btrfs_root *root,
290 btrfs_set_root_generation(&root_item, trans->transid); 289 btrfs_set_root_generation(&root_item, trans->transid);
291 btrfs_set_root_level(&root_item, 0); 290 btrfs_set_root_level(&root_item, 0);
292 btrfs_set_root_refs(&root_item, 1); 291 btrfs_set_root_refs(&root_item, 1);
293 btrfs_set_root_used(&root_item, 0); 292 btrfs_set_root_used(&root_item, leaf->len);
294 btrfs_set_root_last_snapshot(&root_item, 0); 293 btrfs_set_root_last_snapshot(&root_item, 0);
295 294
296 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); 295 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
@@ -342,24 +341,21 @@ static noinline int create_subvol(struct btrfs_root *root,
342 341
343 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 342 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
344fail: 343fail:
345 nr = trans->blocks_used;
346 err = btrfs_commit_transaction(trans, root); 344 err = btrfs_commit_transaction(trans, root);
347 if (err && !ret) 345 if (err && !ret)
348 ret = err; 346 ret = err;
349 347
350 btrfs_unreserve_metadata_space(root, 6); 348 btrfs_unreserve_metadata_space(root, 6);
351 btrfs_btree_balance_dirty(root, nr);
352 return ret; 349 return ret;
353} 350}
354 351
355static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, 352static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
356 char *name, int namelen) 353 char *name, int namelen)
357{ 354{
355 struct inode *inode;
358 struct btrfs_pending_snapshot *pending_snapshot; 356 struct btrfs_pending_snapshot *pending_snapshot;
359 struct btrfs_trans_handle *trans; 357 struct btrfs_trans_handle *trans;
360 int ret = 0; 358 int ret;
361 int err;
362 unsigned long nr = 0;
363 359
364 if (!root->ref_cows) 360 if (!root->ref_cows)
365 return -EINVAL; 361 return -EINVAL;
@@ -372,20 +368,20 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
372 */ 368 */
373 ret = btrfs_reserve_metadata_space(root, 6); 369 ret = btrfs_reserve_metadata_space(root, 6);
374 if (ret) 370 if (ret)
375 goto fail_unlock; 371 goto fail;
376 372
377 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 373 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
378 if (!pending_snapshot) { 374 if (!pending_snapshot) {
379 ret = -ENOMEM; 375 ret = -ENOMEM;
380 btrfs_unreserve_metadata_space(root, 6); 376 btrfs_unreserve_metadata_space(root, 6);
381 goto fail_unlock; 377 goto fail;
382 } 378 }
383 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); 379 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
384 if (!pending_snapshot->name) { 380 if (!pending_snapshot->name) {
385 ret = -ENOMEM; 381 ret = -ENOMEM;
386 kfree(pending_snapshot); 382 kfree(pending_snapshot);
387 btrfs_unreserve_metadata_space(root, 6); 383 btrfs_unreserve_metadata_space(root, 6);
388 goto fail_unlock; 384 goto fail;
389 } 385 }
390 memcpy(pending_snapshot->name, name, namelen); 386 memcpy(pending_snapshot->name, name, namelen);
391 pending_snapshot->name[namelen] = '\0'; 387 pending_snapshot->name[namelen] = '\0';
@@ -395,10 +391,19 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
395 pending_snapshot->root = root; 391 pending_snapshot->root = root;
396 list_add(&pending_snapshot->list, 392 list_add(&pending_snapshot->list,
397 &trans->transaction->pending_snapshots); 393 &trans->transaction->pending_snapshots);
398 err = btrfs_commit_transaction(trans, root); 394 ret = btrfs_commit_transaction(trans, root);
395 BUG_ON(ret);
396 btrfs_unreserve_metadata_space(root, 6);
399 397
400fail_unlock: 398 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
401 btrfs_btree_balance_dirty(root, nr); 399 if (IS_ERR(inode)) {
400 ret = PTR_ERR(inode);
401 goto fail;
402 }
403 BUG_ON(!inode);
404 d_instantiate(dentry, inode);
405 ret = 0;
406fail:
402 return ret; 407 return ret;
403} 408}
404 409
@@ -1027,8 +1032,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1027 BUG_ON(!trans); 1032 BUG_ON(!trans);
1028 1033
1029 /* punch hole in destination first */ 1034 /* punch hole in destination first */
1030 btrfs_drop_extents(trans, root, inode, off, off + len, 1035 btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1);
1031 off + len, 0, &hint_byte, 1);
1032 1036
1033 /* clone data */ 1037 /* clone data */
1034 key.objectid = src->i_ino; 1038 key.objectid = src->i_ino;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 5799bc46a309..5c2a9e78a949 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -291,16 +291,16 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
291 291
292/* 292/*
293 * remove an ordered extent from the tree. No references are dropped 293 * remove an ordered extent from the tree. No references are dropped
294 * but, anyone waiting on this extent is woken up. 294 * and you must wake_up entry->wait. You must hold the tree mutex
295 * while you call this function.
295 */ 296 */
296int btrfs_remove_ordered_extent(struct inode *inode, 297static int __btrfs_remove_ordered_extent(struct inode *inode,
297 struct btrfs_ordered_extent *entry) 298 struct btrfs_ordered_extent *entry)
298{ 299{
299 struct btrfs_ordered_inode_tree *tree; 300 struct btrfs_ordered_inode_tree *tree;
300 struct rb_node *node; 301 struct rb_node *node;
301 302
302 tree = &BTRFS_I(inode)->ordered_tree; 303 tree = &BTRFS_I(inode)->ordered_tree;
303 mutex_lock(&tree->mutex);
304 node = &entry->rb_node; 304 node = &entry->rb_node;
305 rb_erase(node, &tree->tree); 305 rb_erase(node, &tree->tree);
306 tree->last = NULL; 306 tree->last = NULL;
@@ -326,16 +326,34 @@ int btrfs_remove_ordered_extent(struct inode *inode,
326 } 326 }
327 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 327 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
328 328
329 return 0;
330}
331
332/*
333 * remove an ordered extent from the tree. No references are dropped
334 * but any waiters are woken.
335 */
336int btrfs_remove_ordered_extent(struct inode *inode,
337 struct btrfs_ordered_extent *entry)
338{
339 struct btrfs_ordered_inode_tree *tree;
340 int ret;
341
342 tree = &BTRFS_I(inode)->ordered_tree;
343 mutex_lock(&tree->mutex);
344 ret = __btrfs_remove_ordered_extent(inode, entry);
329 mutex_unlock(&tree->mutex); 345 mutex_unlock(&tree->mutex);
330 wake_up(&entry->wait); 346 wake_up(&entry->wait);
331 return 0; 347
348 return ret;
332} 349}
333 350
334/* 351/*
335 * wait for all the ordered extents in a root. This is done when balancing 352 * wait for all the ordered extents in a root. This is done when balancing
336 * space between drives. 353 * space between drives.
337 */ 354 */
338int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) 355int btrfs_wait_ordered_extents(struct btrfs_root *root,
356 int nocow_only, int delay_iput)
339{ 357{
340 struct list_head splice; 358 struct list_head splice;
341 struct list_head *cur; 359 struct list_head *cur;
@@ -372,7 +390,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
372 if (inode) { 390 if (inode) {
373 btrfs_start_ordered_extent(inode, ordered, 1); 391 btrfs_start_ordered_extent(inode, ordered, 1);
374 btrfs_put_ordered_extent(ordered); 392 btrfs_put_ordered_extent(ordered);
375 iput(inode); 393 if (delay_iput)
394 btrfs_add_delayed_iput(inode);
395 else
396 iput(inode);
376 } else { 397 } else {
377 btrfs_put_ordered_extent(ordered); 398 btrfs_put_ordered_extent(ordered);
378 } 399 }
@@ -430,7 +451,7 @@ again:
430 btrfs_wait_ordered_range(inode, 0, (u64)-1); 451 btrfs_wait_ordered_range(inode, 0, (u64)-1);
431 else 452 else
432 filemap_flush(inode->i_mapping); 453 filemap_flush(inode->i_mapping);
433 iput(inode); 454 btrfs_add_delayed_iput(inode);
434 } 455 }
435 456
436 cond_resched(); 457 cond_resched();
@@ -589,7 +610,7 @@ out:
589 * After an extent is done, call this to conditionally update the on disk 610 * After an extent is done, call this to conditionally update the on disk
590 * i_size. i_size is updated to cover any fully written part of the file. 611 * i_size. i_size is updated to cover any fully written part of the file.
591 */ 612 */
592int btrfs_ordered_update_i_size(struct inode *inode, 613int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
593 struct btrfs_ordered_extent *ordered) 614 struct btrfs_ordered_extent *ordered)
594{ 615{
595 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; 616 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
@@ -597,18 +618,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
597 u64 disk_i_size; 618 u64 disk_i_size;
598 u64 new_i_size; 619 u64 new_i_size;
599 u64 i_size_test; 620 u64 i_size_test;
621 u64 i_size = i_size_read(inode);
600 struct rb_node *node; 622 struct rb_node *node;
623 struct rb_node *prev = NULL;
601 struct btrfs_ordered_extent *test; 624 struct btrfs_ordered_extent *test;
625 int ret = 1;
626
627 if (ordered)
628 offset = entry_end(ordered);
629 else
630 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
602 631
603 mutex_lock(&tree->mutex); 632 mutex_lock(&tree->mutex);
604 disk_i_size = BTRFS_I(inode)->disk_i_size; 633 disk_i_size = BTRFS_I(inode)->disk_i_size;
605 634
635 /* truncate file */
636 if (disk_i_size > i_size) {
637 BTRFS_I(inode)->disk_i_size = i_size;
638 ret = 0;
639 goto out;
640 }
641
606 /* 642 /*
607 * if the disk i_size is already at the inode->i_size, or 643 * if the disk i_size is already at the inode->i_size, or
608 * this ordered extent is inside the disk i_size, we're done 644 * this ordered extent is inside the disk i_size, we're done
609 */ 645 */
610 if (disk_i_size >= inode->i_size || 646 if (disk_i_size == i_size || offset <= disk_i_size) {
611 ordered->file_offset + ordered->len <= disk_i_size) {
612 goto out; 647 goto out;
613 } 648 }
614 649
@@ -616,8 +651,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
616 * we can't update the disk_isize if there are delalloc bytes 651 * we can't update the disk_isize if there are delalloc bytes
617 * between disk_i_size and this ordered extent 652 * between disk_i_size and this ordered extent
618 */ 653 */
619 if (test_range_bit(io_tree, disk_i_size, 654 if (test_range_bit(io_tree, disk_i_size, offset - 1,
620 ordered->file_offset + ordered->len - 1,
621 EXTENT_DELALLOC, 0, NULL)) { 655 EXTENT_DELALLOC, 0, NULL)) {
622 goto out; 656 goto out;
623 } 657 }
@@ -626,20 +660,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
626 * if we find an ordered extent then we can't update disk i_size 660 * if we find an ordered extent then we can't update disk i_size
627 * yet 661 * yet
628 */ 662 */
629 node = &ordered->rb_node; 663 if (ordered) {
630 while (1) { 664 node = rb_prev(&ordered->rb_node);
631 node = rb_prev(node); 665 } else {
632 if (!node) 666 prev = tree_search(tree, offset);
633 break; 667 /*
668 * we insert file extents without involving ordered struct,
669 * so there should be no ordered struct cover this offset
670 */
671 if (prev) {
672 test = rb_entry(prev, struct btrfs_ordered_extent,
673 rb_node);
674 BUG_ON(offset_in_entry(test, offset));
675 }
676 node = prev;
677 }
678 while (node) {
634 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 679 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
635 if (test->file_offset + test->len <= disk_i_size) 680 if (test->file_offset + test->len <= disk_i_size)
636 break; 681 break;
637 if (test->file_offset >= inode->i_size) 682 if (test->file_offset >= i_size)
638 break; 683 break;
639 if (test->file_offset >= disk_i_size) 684 if (test->file_offset >= disk_i_size)
640 goto out; 685 goto out;
686 node = rb_prev(node);
641 } 687 }
642 new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode)); 688 new_i_size = min_t(u64, offset, i_size);
643 689
644 /* 690 /*
645 * at this point, we know we can safely update i_size to at least 691 * at this point, we know we can safely update i_size to at least
@@ -647,7 +693,14 @@ int btrfs_ordered_update_i_size(struct inode *inode,
647 * walk forward and see if ios from higher up in the file have 693 * walk forward and see if ios from higher up in the file have
648 * finished. 694 * finished.
649 */ 695 */
650 node = rb_next(&ordered->rb_node); 696 if (ordered) {
697 node = rb_next(&ordered->rb_node);
698 } else {
699 if (prev)
700 node = rb_next(prev);
701 else
702 node = rb_first(&tree->tree);
703 }
651 i_size_test = 0; 704 i_size_test = 0;
652 if (node) { 705 if (node) {
653 /* 706 /*
@@ -655,10 +708,10 @@ int btrfs_ordered_update_i_size(struct inode *inode,
655 * between our ordered extent and the next one. 708 * between our ordered extent and the next one.
656 */ 709 */
657 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 710 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
658 if (test->file_offset > entry_end(ordered)) 711 if (test->file_offset > offset)
659 i_size_test = test->file_offset; 712 i_size_test = test->file_offset;
660 } else { 713 } else {
661 i_size_test = i_size_read(inode); 714 i_size_test = i_size;
662 } 715 }
663 716
664 /* 717 /*
@@ -667,15 +720,25 @@ int btrfs_ordered_update_i_size(struct inode *inode,
667 * are no delalloc bytes in this area, it is safe to update 720 * are no delalloc bytes in this area, it is safe to update
668 * disk_i_size to the end of the region. 721 * disk_i_size to the end of the region.
669 */ 722 */
670 if (i_size_test > entry_end(ordered) && 723 if (i_size_test > offset &&
671 !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, 724 !test_range_bit(io_tree, offset, i_size_test - 1,
672 EXTENT_DELALLOC, 0, NULL)) { 725 EXTENT_DELALLOC, 0, NULL)) {
673 new_i_size = min_t(u64, i_size_test, i_size_read(inode)); 726 new_i_size = min_t(u64, i_size_test, i_size);
674 } 727 }
675 BTRFS_I(inode)->disk_i_size = new_i_size; 728 BTRFS_I(inode)->disk_i_size = new_i_size;
729 ret = 0;
676out: 730out:
731 /*
732 * we need to remove the ordered extent with the tree lock held
733 * so that other people calling this function don't find our fully
734 * processed ordered entry and skip updating the i_size
735 */
736 if (ordered)
737 __btrfs_remove_ordered_extent(inode, ordered);
677 mutex_unlock(&tree->mutex); 738 mutex_unlock(&tree->mutex);
678 return 0; 739 if (ordered)
740 wake_up(&ordered->wait);
741 return ret;
679} 742}
680 743
681/* 744/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index f82e87488ca8..1fe1282ef47c 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -150,12 +150,13 @@ void btrfs_start_ordered_extent(struct inode *inode,
150int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); 150int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
151struct btrfs_ordered_extent * 151struct btrfs_ordered_extent *
152btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); 152btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
153int btrfs_ordered_update_i_size(struct inode *inode, 153int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
154 struct btrfs_ordered_extent *ordered); 154 struct btrfs_ordered_extent *ordered);
155int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); 155int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
156int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
157int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); 156int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
158int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 157int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
159 struct btrfs_root *root, 158 struct btrfs_root *root,
160 struct inode *inode); 159 struct inode *inode);
160int btrfs_wait_ordered_extents(struct btrfs_root *root,
161 int nocow_only, int delay_iput);
161#endif 162#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index cfcc93c93a7b..ed3e4a2ec2c8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1561,6 +1561,20 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1561 return 0; 1561 return 0;
1562} 1562}
1563 1563
1564static void put_inodes(struct list_head *list)
1565{
1566 struct inodevec *ivec;
1567 while (!list_empty(list)) {
1568 ivec = list_entry(list->next, struct inodevec, list);
1569 list_del(&ivec->list);
1570 while (ivec->nr > 0) {
1571 ivec->nr--;
1572 iput(ivec->inode[ivec->nr]);
1573 }
1574 kfree(ivec);
1575 }
1576}
1577
1564static int find_next_key(struct btrfs_path *path, int level, 1578static int find_next_key(struct btrfs_path *path, int level,
1565 struct btrfs_key *key) 1579 struct btrfs_key *key)
1566 1580
@@ -1723,6 +1737,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1723 1737
1724 btrfs_btree_balance_dirty(root, nr); 1738 btrfs_btree_balance_dirty(root, nr);
1725 1739
1740 /*
1741 * put inodes outside transaction, otherwise we may deadlock.
1742 */
1743 put_inodes(&inode_list);
1744
1726 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1745 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1727 invalidate_extent_cache(root, &key, &next_key); 1746 invalidate_extent_cache(root, &key, &next_key);
1728 } 1747 }
@@ -1752,19 +1771,7 @@ out:
1752 1771
1753 btrfs_btree_balance_dirty(root, nr); 1772 btrfs_btree_balance_dirty(root, nr);
1754 1773
1755 /* 1774 put_inodes(&inode_list);
1756 * put inodes while we aren't holding the tree locks
1757 */
1758 while (!list_empty(&inode_list)) {
1759 struct inodevec *ivec;
1760 ivec = list_entry(inode_list.next, struct inodevec, list);
1761 list_del(&ivec->list);
1762 while (ivec->nr > 0) {
1763 ivec->nr--;
1764 iput(ivec->inode[ivec->nr]);
1765 }
1766 kfree(ivec);
1767 }
1768 1775
1769 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1776 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1770 invalidate_extent_cache(root, &key, &next_key); 1777 invalidate_extent_cache(root, &key, &next_key);
@@ -3274,8 +3281,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3274 return -ENOMEM; 3281 return -ENOMEM;
3275 3282
3276 path = btrfs_alloc_path(); 3283 path = btrfs_alloc_path();
3277 if (!path) 3284 if (!path) {
3285 kfree(cluster);
3278 return -ENOMEM; 3286 return -ENOMEM;
3287 }
3279 3288
3280 rc->extents_found = 0; 3289 rc->extents_found = 0;
3281 rc->extents_skipped = 0; 3290 rc->extents_skipped = 0;
@@ -3534,8 +3543,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3534 (unsigned long long)rc->block_group->key.objectid, 3543 (unsigned long long)rc->block_group->key.objectid,
3535 (unsigned long long)rc->block_group->flags); 3544 (unsigned long long)rc->block_group->flags);
3536 3545
3537 btrfs_start_delalloc_inodes(fs_info->tree_root); 3546 btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
3538 btrfs_wait_ordered_extents(fs_info->tree_root, 0); 3547 btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
3539 3548
3540 while (1) { 3549 while (1) {
3541 rc->extents_found = 0; 3550 rc->extents_found = 0;
@@ -3755,6 +3764,7 @@ out:
3755 BTRFS_DATA_RELOC_TREE_OBJECTID); 3764 BTRFS_DATA_RELOC_TREE_OBJECTID);
3756 if (IS_ERR(fs_root)) 3765 if (IS_ERR(fs_root))
3757 err = PTR_ERR(fs_root); 3766 err = PTR_ERR(fs_root);
3767 btrfs_orphan_cleanup(fs_root);
3758 } 3768 }
3759 return err; 3769 return err;
3760} 3770}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 752a5463bf53..3f9b45704fcd 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -128,6 +128,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
128 substring_t args[MAX_OPT_ARGS]; 128 substring_t args[MAX_OPT_ARGS];
129 char *p, *num; 129 char *p, *num;
130 int intarg; 130 int intarg;
131 int ret = 0;
131 132
132 if (!options) 133 if (!options)
133 return 0; 134 return 0;
@@ -262,12 +263,18 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
262 case Opt_discard: 263 case Opt_discard:
263 btrfs_set_opt(info->mount_opt, DISCARD); 264 btrfs_set_opt(info->mount_opt, DISCARD);
264 break; 265 break;
266 case Opt_err:
267 printk(KERN_INFO "btrfs: unrecognized mount option "
268 "'%s'\n", p);
269 ret = -EINVAL;
270 goto out;
265 default: 271 default:
266 break; 272 break;
267 } 273 }
268 } 274 }
275out:
269 kfree(options); 276 kfree(options);
270 return 0; 277 return ret;
271} 278}
272 279
273/* 280/*
@@ -405,8 +412,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
405 return 0; 412 return 0;
406 } 413 }
407 414
408 btrfs_start_delalloc_inodes(root); 415 btrfs_start_delalloc_inodes(root, 0);
409 btrfs_wait_ordered_extents(root, 0); 416 btrfs_wait_ordered_extents(root, 0, 0);
410 417
411 trans = btrfs_start_transaction(root, 1); 418 trans = btrfs_start_transaction(root, 1);
412 ret = btrfs_commit_transaction(trans, root); 419 ret = btrfs_commit_transaction(trans, root);
@@ -450,6 +457,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
450 seq_puts(seq, ",notreelog"); 457 seq_puts(seq, ",notreelog");
451 if (btrfs_test_opt(root, FLUSHONCOMMIT)) 458 if (btrfs_test_opt(root, FLUSHONCOMMIT))
452 seq_puts(seq, ",flushoncommit"); 459 seq_puts(seq, ",flushoncommit");
460 if (btrfs_test_opt(root, DISCARD))
461 seq_puts(seq, ",discard");
453 if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) 462 if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
454 seq_puts(seq, ",noacl"); 463 seq_puts(seq, ",noacl");
455 return 0; 464 return 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c207e8c32c9b..b2acc79f1b34 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -333,6 +333,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
333 memset(trans, 0, sizeof(*trans)); 333 memset(trans, 0, sizeof(*trans));
334 kmem_cache_free(btrfs_trans_handle_cachep, trans); 334 kmem_cache_free(btrfs_trans_handle_cachep, trans);
335 335
336 if (throttle)
337 btrfs_run_delayed_iputs(root);
338
336 return 0; 339 return 0;
337} 340}
338 341
@@ -354,7 +357,7 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
354 * those extents are sent to disk but does not wait on them 357 * those extents are sent to disk but does not wait on them
355 */ 358 */
356int btrfs_write_marked_extents(struct btrfs_root *root, 359int btrfs_write_marked_extents(struct btrfs_root *root,
357 struct extent_io_tree *dirty_pages) 360 struct extent_io_tree *dirty_pages, int mark)
358{ 361{
359 int ret; 362 int ret;
360 int err = 0; 363 int err = 0;
@@ -367,7 +370,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
367 370
368 while (1) { 371 while (1) {
369 ret = find_first_extent_bit(dirty_pages, start, &start, &end, 372 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
370 EXTENT_DIRTY); 373 mark);
371 if (ret) 374 if (ret)
372 break; 375 break;
373 while (start <= end) { 376 while (start <= end) {
@@ -413,7 +416,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
413 * on all the pages and clear them from the dirty pages state tree 416 * on all the pages and clear them from the dirty pages state tree
414 */ 417 */
415int btrfs_wait_marked_extents(struct btrfs_root *root, 418int btrfs_wait_marked_extents(struct btrfs_root *root,
416 struct extent_io_tree *dirty_pages) 419 struct extent_io_tree *dirty_pages, int mark)
417{ 420{
418 int ret; 421 int ret;
419 int err = 0; 422 int err = 0;
@@ -425,12 +428,12 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
425 unsigned long index; 428 unsigned long index;
426 429
427 while (1) { 430 while (1) {
428 ret = find_first_extent_bit(dirty_pages, 0, &start, &end, 431 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
429 EXTENT_DIRTY); 432 mark);
430 if (ret) 433 if (ret)
431 break; 434 break;
432 435
433 clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); 436 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
434 while (start <= end) { 437 while (start <= end) {
435 index = start >> PAGE_CACHE_SHIFT; 438 index = start >> PAGE_CACHE_SHIFT;
436 start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 439 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
@@ -460,13 +463,13 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
460 * those extents are on disk for transaction or log commit 463 * those extents are on disk for transaction or log commit
461 */ 464 */
462int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 465int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
463 struct extent_io_tree *dirty_pages) 466 struct extent_io_tree *dirty_pages, int mark)
464{ 467{
465 int ret; 468 int ret;
466 int ret2; 469 int ret2;
467 470
468 ret = btrfs_write_marked_extents(root, dirty_pages); 471 ret = btrfs_write_marked_extents(root, dirty_pages, mark);
469 ret2 = btrfs_wait_marked_extents(root, dirty_pages); 472 ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
470 return ret || ret2; 473 return ret || ret2;
471} 474}
472 475
@@ -479,7 +482,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
479 return filemap_write_and_wait(btree_inode->i_mapping); 482 return filemap_write_and_wait(btree_inode->i_mapping);
480 } 483 }
481 return btrfs_write_and_wait_marked_extents(root, 484 return btrfs_write_and_wait_marked_extents(root,
482 &trans->transaction->dirty_pages); 485 &trans->transaction->dirty_pages,
486 EXTENT_DIRTY);
483} 487}
484 488
485/* 489/*
@@ -497,13 +501,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
497{ 501{
498 int ret; 502 int ret;
499 u64 old_root_bytenr; 503 u64 old_root_bytenr;
504 u64 old_root_used;
500 struct btrfs_root *tree_root = root->fs_info->tree_root; 505 struct btrfs_root *tree_root = root->fs_info->tree_root;
501 506
507 old_root_used = btrfs_root_used(&root->root_item);
502 btrfs_write_dirty_block_groups(trans, root); 508 btrfs_write_dirty_block_groups(trans, root);
503 509
504 while (1) { 510 while (1) {
505 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 511 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
506 if (old_root_bytenr == root->node->start) 512 if (old_root_bytenr == root->node->start &&
513 old_root_used == btrfs_root_used(&root->root_item))
507 break; 514 break;
508 515
509 btrfs_set_root_node(&root->root_item, root->node); 516 btrfs_set_root_node(&root->root_item, root->node);
@@ -512,6 +519,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
512 &root->root_item); 519 &root->root_item);
513 BUG_ON(ret); 520 BUG_ON(ret);
514 521
522 old_root_used = btrfs_root_used(&root->root_item);
515 ret = btrfs_write_dirty_block_groups(trans, root); 523 ret = btrfs_write_dirty_block_groups(trans, root);
516 BUG_ON(ret); 524 BUG_ON(ret);
517 } 525 }
@@ -795,7 +803,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
795 memcpy(&pending->root_key, &key, sizeof(key)); 803 memcpy(&pending->root_key, &key, sizeof(key));
796fail: 804fail:
797 kfree(new_root_item); 805 kfree(new_root_item);
798 btrfs_unreserve_metadata_space(root, 6);
799 return ret; 806 return ret;
800} 807}
801 808
@@ -807,7 +814,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
807 u64 index = 0; 814 u64 index = 0;
808 struct btrfs_trans_handle *trans; 815 struct btrfs_trans_handle *trans;
809 struct inode *parent_inode; 816 struct inode *parent_inode;
810 struct inode *inode;
811 struct btrfs_root *parent_root; 817 struct btrfs_root *parent_root;
812 818
813 parent_inode = pending->dentry->d_parent->d_inode; 819 parent_inode = pending->dentry->d_parent->d_inode;
@@ -839,8 +845,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
839 845
840 BUG_ON(ret); 846 BUG_ON(ret);
841 847
842 inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
843 d_instantiate(pending->dentry, inode);
844fail: 848fail:
845 btrfs_end_transaction(trans, fs_info->fs_root); 849 btrfs_end_transaction(trans, fs_info->fs_root);
846 return ret; 850 return ret;
@@ -994,11 +998,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
994 mutex_unlock(&root->fs_info->trans_mutex); 998 mutex_unlock(&root->fs_info->trans_mutex);
995 999
996 if (flush_on_commit) { 1000 if (flush_on_commit) {
997 btrfs_start_delalloc_inodes(root); 1001 btrfs_start_delalloc_inodes(root, 1);
998 ret = btrfs_wait_ordered_extents(root, 0); 1002 ret = btrfs_wait_ordered_extents(root, 0, 1);
999 BUG_ON(ret); 1003 BUG_ON(ret);
1000 } else if (snap_pending) { 1004 } else if (snap_pending) {
1001 ret = btrfs_wait_ordered_extents(root, 1); 1005 ret = btrfs_wait_ordered_extents(root, 0, 1);
1002 BUG_ON(ret); 1006 BUG_ON(ret);
1003 } 1007 }
1004 1008
@@ -1116,6 +1120,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1116 current->journal_info = NULL; 1120 current->journal_info = NULL;
1117 1121
1118 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1122 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1123
1124 if (current != root->fs_info->transaction_kthread)
1125 btrfs_run_delayed_iputs(root);
1126
1119 return ret; 1127 return ret;
1120} 1128}
1121 1129
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index d4e3e7a6938c..93c7ccb33118 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -107,10 +107,10 @@ void btrfs_throttle(struct btrfs_root *root);
107int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 107int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
108 struct btrfs_root *root); 108 struct btrfs_root *root);
109int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 109int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
110 struct extent_io_tree *dirty_pages); 110 struct extent_io_tree *dirty_pages, int mark);
111int btrfs_write_marked_extents(struct btrfs_root *root, 111int btrfs_write_marked_extents(struct btrfs_root *root,
112 struct extent_io_tree *dirty_pages); 112 struct extent_io_tree *dirty_pages, int mark);
113int btrfs_wait_marked_extents(struct btrfs_root *root, 113int btrfs_wait_marked_extents(struct btrfs_root *root,
114 struct extent_io_tree *dirty_pages); 114 struct extent_io_tree *dirty_pages, int mark);
115int btrfs_transaction_in_commit(struct btrfs_fs_info *info); 115int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
116#endif 116#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 741666a7676a..4a9434b622ec 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -542,8 +542,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
542 542
543 saved_nbytes = inode_get_bytes(inode); 543 saved_nbytes = inode_get_bytes(inode);
544 /* drop any overlapping extents */ 544 /* drop any overlapping extents */
545 ret = btrfs_drop_extents(trans, root, inode, 545 ret = btrfs_drop_extents(trans, inode, start, extent_end,
546 start, extent_end, extent_end, start, &alloc_hint, 1); 546 &alloc_hint, 1);
547 BUG_ON(ret); 547 BUG_ON(ret);
548 548
549 if (found_type == BTRFS_FILE_EXTENT_REG || 549 if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -930,6 +930,17 @@ out_nowrite:
930 return 0; 930 return 0;
931} 931}
932 932
933static int insert_orphan_item(struct btrfs_trans_handle *trans,
934 struct btrfs_root *root, u64 offset)
935{
936 int ret;
937 ret = btrfs_find_orphan_item(root, offset);
938 if (ret > 0)
939 ret = btrfs_insert_orphan_item(trans, root, offset);
940 return ret;
941}
942
943
933/* 944/*
934 * There are a few corners where the link count of the file can't 945 * There are a few corners where the link count of the file can't
935 * be properly maintained during replay. So, instead of adding 946 * be properly maintained during replay. So, instead of adding
@@ -997,9 +1008,13 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
997 } 1008 }
998 BTRFS_I(inode)->index_cnt = (u64)-1; 1009 BTRFS_I(inode)->index_cnt = (u64)-1;
999 1010
1000 if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { 1011 if (inode->i_nlink == 0) {
1001 ret = replay_dir_deletes(trans, root, NULL, path, 1012 if (S_ISDIR(inode->i_mode)) {
1002 inode->i_ino, 1); 1013 ret = replay_dir_deletes(trans, root, NULL, path,
1014 inode->i_ino, 1);
1015 BUG_ON(ret);
1016 }
1017 ret = insert_orphan_item(trans, root, inode->i_ino);
1003 BUG_ON(ret); 1018 BUG_ON(ret);
1004 } 1019 }
1005 btrfs_free_path(path); 1020 btrfs_free_path(path);
@@ -1587,7 +1602,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1587 /* inode keys are done during the first stage */ 1602 /* inode keys are done during the first stage */
1588 if (key.type == BTRFS_INODE_ITEM_KEY && 1603 if (key.type == BTRFS_INODE_ITEM_KEY &&
1589 wc->stage == LOG_WALK_REPLAY_INODES) { 1604 wc->stage == LOG_WALK_REPLAY_INODES) {
1590 struct inode *inode;
1591 struct btrfs_inode_item *inode_item; 1605 struct btrfs_inode_item *inode_item;
1592 u32 mode; 1606 u32 mode;
1593 1607
@@ -1603,31 +1617,16 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1603 eb, i, &key); 1617 eb, i, &key);
1604 BUG_ON(ret); 1618 BUG_ON(ret);
1605 1619
1606 /* for regular files, truncate away 1620 /* for regular files, make sure corresponding
1607 * extents past the new EOF 1621 * orhpan item exist. extents past the new EOF
1622 * will be truncated later by orphan cleanup.
1608 */ 1623 */
1609 if (S_ISREG(mode)) { 1624 if (S_ISREG(mode)) {
1610 inode = read_one_inode(root, 1625 ret = insert_orphan_item(wc->trans, root,
1611 key.objectid); 1626 key.objectid);
1612 BUG_ON(!inode);
1613
1614 ret = btrfs_truncate_inode_items(wc->trans,
1615 root, inode, inode->i_size,
1616 BTRFS_EXTENT_DATA_KEY);
1617 BUG_ON(ret); 1627 BUG_ON(ret);
1618
1619 /* if the nlink count is zero here, the iput
1620 * will free the inode. We bump it to make
1621 * sure it doesn't get freed until the link
1622 * count fixup is done
1623 */
1624 if (inode->i_nlink == 0) {
1625 btrfs_inc_nlink(inode);
1626 btrfs_update_inode(wc->trans,
1627 root, inode);
1628 }
1629 iput(inode);
1630 } 1628 }
1629
1631 ret = link_to_fixup_dir(wc->trans, root, 1630 ret = link_to_fixup_dir(wc->trans, root,
1632 path, key.objectid); 1631 path, key.objectid);
1633 BUG_ON(ret); 1632 BUG_ON(ret);
@@ -1977,10 +1976,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
1977{ 1976{
1978 int index1; 1977 int index1;
1979 int index2; 1978 int index2;
1979 int mark;
1980 int ret; 1980 int ret;
1981 struct btrfs_root *log = root->log_root; 1981 struct btrfs_root *log = root->log_root;
1982 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; 1982 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
1983 u64 log_transid = 0; 1983 unsigned long log_transid = 0;
1984 1984
1985 mutex_lock(&root->log_mutex); 1985 mutex_lock(&root->log_mutex);
1986 index1 = root->log_transid % 2; 1986 index1 = root->log_transid % 2;
@@ -2014,24 +2014,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2014 goto out; 2014 goto out;
2015 } 2015 }
2016 2016
2017 log_transid = root->log_transid;
2018 if (log_transid % 2 == 0)
2019 mark = EXTENT_DIRTY;
2020 else
2021 mark = EXTENT_NEW;
2022
2017 /* we start IO on all the marked extents here, but we don't actually 2023 /* we start IO on all the marked extents here, but we don't actually
2018 * wait for them until later. 2024 * wait for them until later.
2019 */ 2025 */
2020 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages); 2026 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
2021 BUG_ON(ret); 2027 BUG_ON(ret);
2022 2028
2023 btrfs_set_root_node(&log->root_item, log->node); 2029 btrfs_set_root_node(&log->root_item, log->node);
2024 2030
2025 root->log_batch = 0; 2031 root->log_batch = 0;
2026 log_transid = root->log_transid;
2027 root->log_transid++; 2032 root->log_transid++;
2028 log->log_transid = root->log_transid; 2033 log->log_transid = root->log_transid;
2029 root->log_start_pid = 0; 2034 root->log_start_pid = 0;
2030 smp_mb(); 2035 smp_mb();
2031 /* 2036 /*
2032 * log tree has been flushed to disk, new modifications of 2037 * IO has been started, blocks of the log tree have WRITTEN flag set
2033 * the log will be written to new positions. so it's safe to 2038 * in their headers. new modifications of the log will be written to
2034 * allow log writers to go in. 2039 * new positions. so it's safe to allow log writers to go in.
2035 */ 2040 */
2036 mutex_unlock(&root->log_mutex); 2041 mutex_unlock(&root->log_mutex);
2037 2042
@@ -2052,7 +2057,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2052 2057
2053 index2 = log_root_tree->log_transid % 2; 2058 index2 = log_root_tree->log_transid % 2;
2054 if (atomic_read(&log_root_tree->log_commit[index2])) { 2059 if (atomic_read(&log_root_tree->log_commit[index2])) {
2055 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2060 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2056 wait_log_commit(trans, log_root_tree, 2061 wait_log_commit(trans, log_root_tree,
2057 log_root_tree->log_transid); 2062 log_root_tree->log_transid);
2058 mutex_unlock(&log_root_tree->log_mutex); 2063 mutex_unlock(&log_root_tree->log_mutex);
@@ -2072,16 +2077,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2072 * check the full commit flag again 2077 * check the full commit flag again
2073 */ 2078 */
2074 if (root->fs_info->last_trans_log_full_commit == trans->transid) { 2079 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2075 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2080 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2076 mutex_unlock(&log_root_tree->log_mutex); 2081 mutex_unlock(&log_root_tree->log_mutex);
2077 ret = -EAGAIN; 2082 ret = -EAGAIN;
2078 goto out_wake_log_root; 2083 goto out_wake_log_root;
2079 } 2084 }
2080 2085
2081 ret = btrfs_write_and_wait_marked_extents(log_root_tree, 2086 ret = btrfs_write_and_wait_marked_extents(log_root_tree,
2082 &log_root_tree->dirty_log_pages); 2087 &log_root_tree->dirty_log_pages,
2088 EXTENT_DIRTY | EXTENT_NEW);
2083 BUG_ON(ret); 2089 BUG_ON(ret);
2084 btrfs_wait_marked_extents(log, &log->dirty_log_pages); 2090 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2085 2091
2086 btrfs_set_super_log_root(&root->fs_info->super_for_commit, 2092 btrfs_set_super_log_root(&root->fs_info->super_for_commit,
2087 log_root_tree->node->start); 2093 log_root_tree->node->start);
@@ -2147,12 +2153,12 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
2147 2153
2148 while (1) { 2154 while (1) {
2149 ret = find_first_extent_bit(&log->dirty_log_pages, 2155 ret = find_first_extent_bit(&log->dirty_log_pages,
2150 0, &start, &end, EXTENT_DIRTY); 2156 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
2151 if (ret) 2157 if (ret)
2152 break; 2158 break;
2153 2159
2154 clear_extent_dirty(&log->dirty_log_pages, 2160 clear_extent_bits(&log->dirty_log_pages, start, end,
2155 start, end, GFP_NOFS); 2161 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
2156 } 2162 }
2157 2163
2158 if (log->log_transid > 0) { 2164 if (log->log_transid > 0) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7eda483d7b5a..220dad5db017 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2209,7 +2209,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2209 max_chunk_size = 10 * calc_size; 2209 max_chunk_size = 10 * calc_size;
2210 min_stripe_size = 64 * 1024 * 1024; 2210 min_stripe_size = 64 * 1024 * 1024;
2211 } else if (type & BTRFS_BLOCK_GROUP_METADATA) { 2211 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2212 max_chunk_size = 4 * calc_size; 2212 max_chunk_size = 256 * 1024 * 1024;
2213 min_stripe_size = 32 * 1024 * 1024; 2213 min_stripe_size = 32 * 1024 * 1024;
2214 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { 2214 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2215 calc_size = 8 * 1024 * 1024; 2215 calc_size = 8 * 1024 * 1024;
@@ -2649,8 +2649,10 @@ again:
2649 em = lookup_extent_mapping(em_tree, logical, *length); 2649 em = lookup_extent_mapping(em_tree, logical, *length);
2650 read_unlock(&em_tree->lock); 2650 read_unlock(&em_tree->lock);
2651 2651
2652 if (!em && unplug_page) 2652 if (!em && unplug_page) {
2653 kfree(multi);
2653 return 0; 2654 return 0;
2655 }
2654 2656
2655 if (!em) { 2657 if (!em) {
2656 printk(KERN_CRIT "unable to find logical %llu len %llu\n", 2658 printk(KERN_CRIT "unable to find logical %llu len %llu\n",
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index b6dd5967c48a..193b58f7d3f3 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -85,22 +85,23 @@ out:
85 return ret; 85 return ret;
86} 86}
87 87
88int __btrfs_setxattr(struct inode *inode, const char *name, 88static int do_setxattr(struct btrfs_trans_handle *trans,
89 const void *value, size_t size, int flags) 89 struct inode *inode, const char *name,
90 const void *value, size_t size, int flags)
90{ 91{
91 struct btrfs_dir_item *di; 92 struct btrfs_dir_item *di;
92 struct btrfs_root *root = BTRFS_I(inode)->root; 93 struct btrfs_root *root = BTRFS_I(inode)->root;
93 struct btrfs_trans_handle *trans;
94 struct btrfs_path *path; 94 struct btrfs_path *path;
95 int ret = 0, mod = 0; 95 size_t name_len = strlen(name);
96 int ret = 0;
97
98 if (name_len + size > BTRFS_MAX_XATTR_SIZE(root))
99 return -ENOSPC;
96 100
97 path = btrfs_alloc_path(); 101 path = btrfs_alloc_path();
98 if (!path) 102 if (!path)
99 return -ENOMEM; 103 return -ENOMEM;
100 104
101 trans = btrfs_join_transaction(root, 1);
102 btrfs_set_trans_block_group(trans, inode);
103
104 /* first lets see if we already have this xattr */ 105 /* first lets see if we already have this xattr */
105 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, 106 di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name,
106 strlen(name), -1); 107 strlen(name), -1);
@@ -118,15 +119,12 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
118 } 119 }
119 120
120 ret = btrfs_delete_one_dir_name(trans, root, path, di); 121 ret = btrfs_delete_one_dir_name(trans, root, path, di);
121 if (ret) 122 BUG_ON(ret);
122 goto out;
123 btrfs_release_path(root, path); 123 btrfs_release_path(root, path);
124 124
125 /* if we don't have a value then we are removing the xattr */ 125 /* if we don't have a value then we are removing the xattr */
126 if (!value) { 126 if (!value)
127 mod = 1;
128 goto out; 127 goto out;
129 }
130 } else { 128 } else {
131 btrfs_release_path(root, path); 129 btrfs_release_path(root, path);
132 130
@@ -138,20 +136,45 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
138 } 136 }
139 137
140 /* ok we have to create a completely new xattr */ 138 /* ok we have to create a completely new xattr */
141 ret = btrfs_insert_xattr_item(trans, root, name, strlen(name), 139 ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino,
142 value, size, inode->i_ino); 140 name, name_len, value, size);
141 BUG_ON(ret);
142out:
143 btrfs_free_path(path);
144 return ret;
145}
146
147int __btrfs_setxattr(struct btrfs_trans_handle *trans,
148 struct inode *inode, const char *name,
149 const void *value, size_t size, int flags)
150{
151 struct btrfs_root *root = BTRFS_I(inode)->root;
152 int ret;
153
154 if (trans)
155 return do_setxattr(trans, inode, name, value, size, flags);
156
157 ret = btrfs_reserve_metadata_space(root, 2);
143 if (ret) 158 if (ret)
144 goto out; 159 return ret;
145 mod = 1;
146 160
147out: 161 trans = btrfs_start_transaction(root, 1);
148 if (mod) { 162 if (!trans) {
149 inode->i_ctime = CURRENT_TIME; 163 ret = -ENOMEM;
150 ret = btrfs_update_inode(trans, root, inode); 164 goto out;
151 } 165 }
166 btrfs_set_trans_block_group(trans, inode);
152 167
153 btrfs_end_transaction(trans, root); 168 ret = do_setxattr(trans, inode, name, value, size, flags);
154 btrfs_free_path(path); 169 if (ret)
170 goto out;
171
172 inode->i_ctime = CURRENT_TIME;
173 ret = btrfs_update_inode(trans, root, inode);
174 BUG_ON(ret);
175out:
176 btrfs_end_transaction_throttle(trans, root);
177 btrfs_unreserve_metadata_space(root, 2);
155 return ret; 178 return ret;
156} 179}
157 180
@@ -314,7 +337,9 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
314 337
315 if (size == 0) 338 if (size == 0)
316 value = ""; /* empty EA, do not remove */ 339 value = ""; /* empty EA, do not remove */
317 return __btrfs_setxattr(dentry->d_inode, name, value, size, flags); 340
341 return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size,
342 flags);
318} 343}
319 344
320int btrfs_removexattr(struct dentry *dentry, const char *name) 345int btrfs_removexattr(struct dentry *dentry, const char *name)
@@ -329,10 +354,13 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
329 354
330 if (!btrfs_is_valid_xattr(name)) 355 if (!btrfs_is_valid_xattr(name))
331 return -EOPNOTSUPP; 356 return -EOPNOTSUPP;
332 return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); 357
358 return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
359 XATTR_REPLACE);
333} 360}
334 361
335int btrfs_xattr_security_init(struct inode *inode, struct inode *dir) 362int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
363 struct inode *inode, struct inode *dir)
336{ 364{
337 int err; 365 int err;
338 size_t len; 366 size_t len;
@@ -354,7 +382,7 @@ int btrfs_xattr_security_init(struct inode *inode, struct inode *dir)
354 } else { 382 } else {
355 strcpy(name, XATTR_SECURITY_PREFIX); 383 strcpy(name, XATTR_SECURITY_PREFIX);
356 strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); 384 strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix);
357 err = __btrfs_setxattr(inode, name, value, len, 0); 385 err = __btrfs_setxattr(trans, inode, name, value, len, 0);
358 kfree(name); 386 kfree(name);
359 } 387 }
360 388
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index c71e9c3cf3f7..721efa0346e0 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -27,15 +27,16 @@ extern struct xattr_handler *btrfs_xattr_handlers[];
27 27
28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, 28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
29 void *buffer, size_t size); 29 void *buffer, size_t size);
30extern int __btrfs_setxattr(struct inode *inode, const char *name, 30extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
31 const void *value, size_t size, int flags); 31 struct inode *inode, const char *name,
32 32 const void *value, size_t size, int flags);
33extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, 33extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
34 void *buffer, size_t size); 34 void *buffer, size_t size);
35extern int btrfs_setxattr(struct dentry *dentry, const char *name, 35extern int btrfs_setxattr(struct dentry *dentry, const char *name,
36 const void *value, size_t size, int flags); 36 const void *value, size_t size, int flags);
37extern int btrfs_removexattr(struct dentry *dentry, const char *name); 37extern int btrfs_removexattr(struct dentry *dentry, const char *name);
38 38
39extern int btrfs_xattr_security_init(struct inode *inode, struct inode *dir); 39extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
40 struct inode *inode, struct inode *dir);
40 41
41#endif /* __XATTR__ */ 42#endif /* __XATTR__ */
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index 3797e0077b35..2906077ac798 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -84,7 +84,7 @@ int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
84static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) 84static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
85{ 85{
86 struct cachefiles_object *fsdef; 86 struct cachefiles_object *fsdef;
87 struct nameidata nd; 87 struct path path;
88 struct kstatfs stats; 88 struct kstatfs stats;
89 struct dentry *graveyard, *cachedir, *root; 89 struct dentry *graveyard, *cachedir, *root;
90 const struct cred *saved_cred; 90 const struct cred *saved_cred;
@@ -114,15 +114,12 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
114 _debug("- fsdef %p", fsdef); 114 _debug("- fsdef %p", fsdef);
115 115
116 /* look up the directory at the root of the cache */ 116 /* look up the directory at the root of the cache */
117 memset(&nd, 0, sizeof(nd)); 117 ret = kern_path(cache->rootdirname, LOOKUP_DIRECTORY, &path);
118
119 ret = path_lookup(cache->rootdirname, LOOKUP_DIRECTORY, &nd);
120 if (ret < 0) 118 if (ret < 0)
121 goto error_open_root; 119 goto error_open_root;
122 120
123 cache->mnt = mntget(nd.path.mnt); 121 cache->mnt = path.mnt;
124 root = dget(nd.path.dentry); 122 root = path.dentry;
125 path_put(&nd.path);
126 123
127 /* check parameters */ 124 /* check parameters */
128 ret = -EOPNOTSUPP; 125 ret = -EOPNOTSUPP;
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 4618516dd994..c2413561ea75 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -21,6 +21,7 @@
21#include <linux/mount.h> 21#include <linux/mount.h>
22#include <linux/statfs.h> 22#include <linux/statfs.h>
23#include <linux/ctype.h> 23#include <linux/ctype.h>
24#include <linux/string.h>
24#include <linux/fs_struct.h> 25#include <linux/fs_struct.h>
25#include "internal.h" 26#include "internal.h"
26 27
@@ -257,8 +258,7 @@ static ssize_t cachefiles_daemon_write(struct file *file,
257 if (args == data) 258 if (args == data)
258 goto error; 259 goto error;
259 *args = '\0'; 260 *args = '\0';
260 for (args++; isspace(*args); args++) 261 args = skip_spaces(++args);
261 continue;
262 } 262 }
263 263
264 /* run the appropriate command handler */ 264 /* run the appropriate command handler */
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index a6c8c6fe8df9..1d8332563863 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -11,7 +11,6 @@
11 11
12#include <linux/mount.h> 12#include <linux/mount.h>
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/ima.h>
15#include "internal.h" 14#include "internal.h"
16 15
17/* 16/*
@@ -923,7 +922,6 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
923 if (IS_ERR(file)) { 922 if (IS_ERR(file)) {
924 ret = PTR_ERR(file); 923 ret = PTR_ERR(file);
925 } else { 924 } else {
926 ima_counts_get(file);
927 ret = -EIO; 925 ret = -EIO;
928 if (file->f_op->write) { 926 if (file->f_op->write) {
929 pos = (loff_t) page->index << PAGE_SHIFT; 927 pos = (loff_t) page->index << PAGE_SHIFT;
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 094ea65afc85..7b2600b380d7 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -5,7 +5,9 @@ have duplicated data). Fix oops in cifs_lookup. Workaround problem
5mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session. 5mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session.
6Disable use of server inode numbers when server only 6Disable use of server inode numbers when server only
7partially supports them (e.g. for one server querying inode numbers on 7partially supports them (e.g. for one server querying inode numbers on
8FindFirst fails but QPathInfo queries works). 8FindFirst fails but QPathInfo queries works). Fix oops with dfs in
9cifs_put_smb_ses. Fix mmap to work on directio mounts (needed
10for OpenOffice when on forcedirectio mount e.g.)
9 11
10Version 1.60 12Version 1.60
11------------- 13-------------
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index fea9e898c4ba..b44ce0a0711c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -269,7 +269,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
269 int err; 269 int err;
270 270
271 mntget(newmnt); 271 mntget(newmnt);
272 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist); 272 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags | MNT_SHRINKABLE, mntlist);
273 switch (err) { 273 switch (err) {
274 case 0: 274 case 0:
275 path_put(&nd->path); 275 path_put(&nd->path);
@@ -371,7 +371,6 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
371 if (IS_ERR(mnt)) 371 if (IS_ERR(mnt))
372 goto out_err; 372 goto out_err;
373 373
374 nd->path.mnt->mnt_flags |= MNT_SHRINKABLE;
375 rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list); 374 rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list);
376 375
377out: 376out:
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 29f1da761bbf..8c6a03627176 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -758,7 +758,7 @@ const struct file_operations cifs_file_ops = {
758}; 758};
759 759
760const struct file_operations cifs_file_direct_ops = { 760const struct file_operations cifs_file_direct_ops = {
761 /* no mmap, no aio, no readv - 761 /* no aio, no readv -
762 BB reevaluate whether they can be done with directio, no cache */ 762 BB reevaluate whether they can be done with directio, no cache */
763 .read = cifs_user_read, 763 .read = cifs_user_read,
764 .write = cifs_user_write, 764 .write = cifs_user_write,
@@ -767,6 +767,7 @@ const struct file_operations cifs_file_direct_ops = {
767 .lock = cifs_lock, 767 .lock = cifs_lock,
768 .fsync = cifs_fsync, 768 .fsync = cifs_fsync,
769 .flush = cifs_flush, 769 .flush = cifs_flush,
770 .mmap = cifs_file_mmap,
770 .splice_read = generic_file_splice_read, 771 .splice_read = generic_file_splice_read,
771#ifdef CONFIG_CIFS_POSIX 772#ifdef CONFIG_CIFS_POSIX
772 .unlocked_ioctl = cifs_ioctl, 773 .unlocked_ioctl = cifs_ioctl,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 63ea83ff687f..3bbcaa716b3c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2287,12 +2287,12 @@ int
2287cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 2287cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2288 char *mount_data_global, const char *devname) 2288 char *mount_data_global, const char *devname)
2289{ 2289{
2290 int rc = 0; 2290 int rc;
2291 int xid; 2291 int xid;
2292 struct smb_vol *volume_info; 2292 struct smb_vol *volume_info;
2293 struct cifsSesInfo *pSesInfo = NULL; 2293 struct cifsSesInfo *pSesInfo;
2294 struct cifsTconInfo *tcon = NULL; 2294 struct cifsTconInfo *tcon;
2295 struct TCP_Server_Info *srvTcp = NULL; 2295 struct TCP_Server_Info *srvTcp;
2296 char *full_path; 2296 char *full_path;
2297 char *mount_data = mount_data_global; 2297 char *mount_data = mount_data_global;
2298#ifdef CONFIG_CIFS_DFS_UPCALL 2298#ifdef CONFIG_CIFS_DFS_UPCALL
@@ -2301,6 +2301,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2301 int referral_walks_count = 0; 2301 int referral_walks_count = 0;
2302try_mount_again: 2302try_mount_again:
2303#endif 2303#endif
2304 rc = 0;
2305 tcon = NULL;
2306 pSesInfo = NULL;
2307 srvTcp = NULL;
2304 full_path = NULL; 2308 full_path = NULL;
2305 2309
2306 xid = GetXid(); 2310 xid = GetXid();
@@ -2597,6 +2601,7 @@ remote_path_check:
2597 2601
2598 cleanup_volume_info(&volume_info); 2602 cleanup_volume_info(&volume_info);
2599 referral_walks_count++; 2603 referral_walks_count++;
2604 FreeXid(xid);
2600 goto try_mount_again; 2605 goto try_mount_again;
2601 } 2606 }
2602#else /* No DFS support, return error on mount */ 2607#else /* No DFS support, return error on mount */
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 75949d6a5f1b..6177f7cca16a 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -24,7 +24,7 @@
24 */ 24 */
25 25
26 /* 26 /*
27 * See Documentation/filesystems/Exporting 27 * See Documentation/filesystems/nfs/Exporting
28 * and examples in fs/exportfs 28 * and examples in fs/exportfs
29 * 29 *
30 * Since cifs is a network file system, an "fsid" must be included for 30 * Since cifs is a network file system, an "fsid" must be included for
diff --git a/fs/compat.c b/fs/compat.c
index 6c19040ffeef..00d90c2e66f0 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -38,8 +38,6 @@
38#include <linux/dirent.h> 38#include <linux/dirent.h>
39#include <linux/fsnotify.h> 39#include <linux/fsnotify.h>
40#include <linux/highuid.h> 40#include <linux/highuid.h>
41#include <linux/sunrpc/svc.h>
42#include <linux/nfsd/nfsd.h>
43#include <linux/nfsd/syscall.h> 41#include <linux/nfsd/syscall.h>
44#include <linux/personality.h> 42#include <linux/personality.h>
45#include <linux/rwsem.h> 43#include <linux/rwsem.h>
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 14cbc831422a..c5c45de1a2ee 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1005,6 +1005,9 @@ COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND)
1005COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST) 1005COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST)
1006COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI) 1006COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI)
1007#endif 1007#endif
1008/* Big V (don't complain on serial console) */
1009IGNORE_IOCTL(VT_OPENQRY)
1010IGNORE_IOCTL(VT_GETMODE)
1008/* Little p (/dev/rtc, /dev/envctrl, etc.) */ 1011/* Little p (/dev/rtc, /dev/envctrl, etc.) */
1009COMPATIBLE_IOCTL(RTC_AIE_ON) 1012COMPATIBLE_IOCTL(RTC_AIE_ON)
1010COMPATIBLE_IOCTL(RTC_AIE_OFF) 1013COMPATIBLE_IOCTL(RTC_AIE_OFF)
@@ -1600,8 +1603,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
1600 case KDSKBMETA: 1603 case KDSKBMETA:
1601 case KDSKBLED: 1604 case KDSKBLED:
1602 case KDSETLED: 1605 case KDSETLED:
1603 /* SG stuff */
1604 case SG_SET_TRANSFORM:
1605 /* AUTOFS */ 1606 /* AUTOFS */
1606 case AUTOFS_IOC_READY: 1607 case AUTOFS_IOC_READY:
1607 case AUTOFS_IOC_FAIL: 1608 case AUTOFS_IOC_FAIL:
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index c8afa6b1d91d..32a5f46b1157 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -121,8 +121,10 @@ static int get_target(const char *symname, struct path *path,
121 ret = -ENOENT; 121 ret = -ENOENT;
122 path_put(path); 122 path_put(path);
123 } 123 }
124 } else 124 } else {
125 ret = -EPERM; 125 ret = -EPERM;
126 path_put(path);
127 }
126 } 128 }
127 129
128 return ret; 130 return ret;
diff --git a/fs/dcache.c b/fs/dcache.c
index a100fa35a48f..953173a293a9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -978,6 +978,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
978 q.hash = full_name_hash(q.name, q.len); 978 q.hash = full_name_hash(q.name, q.len);
979 return d_alloc(parent, &q); 979 return d_alloc(parent, &q);
980} 980}
981EXPORT_SYMBOL(d_alloc_name);
981 982
982/* the caller must hold dcache_lock */ 983/* the caller must hold dcache_lock */
983static void __d_instantiate(struct dentry *dentry, struct inode *inode) 984static void __d_instantiate(struct dentry *dentry, struct inode *inode)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b912270942fa..e82adc2debb7 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -53,13 +53,6 @@
53 * 53 *
54 * If blkfactor is zero then the user's request was aligned to the filesystem's 54 * If blkfactor is zero then the user's request was aligned to the filesystem's
55 * blocksize. 55 * blocksize.
56 *
57 * lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems.
58 * This determines whether we need to do the fancy locking which prevents
59 * direct-IO from being able to read uninitialised disk blocks. If its zero
60 * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_mutex is
61 * not held for the entire direct write (taken briefly, initially, during a
62 * direct read though, but its never held for the duration of a direct-IO).
63 */ 56 */
64 57
65struct dio { 58struct dio {
@@ -68,7 +61,7 @@ struct dio {
68 struct inode *inode; 61 struct inode *inode;
69 int rw; 62 int rw;
70 loff_t i_size; /* i_size when submitted */ 63 loff_t i_size; /* i_size when submitted */
71 int lock_type; /* doesn't change */ 64 int flags; /* doesn't change */
72 unsigned blkbits; /* doesn't change */ 65 unsigned blkbits; /* doesn't change */
73 unsigned blkfactor; /* When we're using an alignment which 66 unsigned blkfactor; /* When we're using an alignment which
74 is finer than the filesystem's soft 67 is finer than the filesystem's soft
@@ -104,6 +97,18 @@ struct dio {
104 unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ 97 unsigned cur_page_len; /* Nr of bytes at cur_page_offset */
105 sector_t cur_page_block; /* Where it starts */ 98 sector_t cur_page_block; /* Where it starts */
106 99
100 /* BIO completion state */
101 spinlock_t bio_lock; /* protects BIO fields below */
102 unsigned long refcount; /* direct_io_worker() and bios */
103 struct bio *bio_list; /* singly linked via bi_private */
104 struct task_struct *waiter; /* waiting task (NULL if none) */
105
106 /* AIO related stuff */
107 struct kiocb *iocb; /* kiocb */
108 int is_async; /* is IO async ? */
109 int io_error; /* IO error in completion path */
110 ssize_t result; /* IO result */
111
107 /* 112 /*
108 * Page fetching state. These variables belong to dio_refill_pages(). 113 * Page fetching state. These variables belong to dio_refill_pages().
109 */ 114 */
@@ -115,22 +120,16 @@ struct dio {
115 * Page queue. These variables belong to dio_refill_pages() and 120 * Page queue. These variables belong to dio_refill_pages() and
116 * dio_get_page(). 121 * dio_get_page().
117 */ 122 */
118 struct page *pages[DIO_PAGES]; /* page buffer */
119 unsigned head; /* next page to process */ 123 unsigned head; /* next page to process */
120 unsigned tail; /* last valid page + 1 */ 124 unsigned tail; /* last valid page + 1 */
121 int page_errors; /* errno from get_user_pages() */ 125 int page_errors; /* errno from get_user_pages() */
122 126
123 /* BIO completion state */ 127 /*
124 spinlock_t bio_lock; /* protects BIO fields below */ 128 * pages[] (and any fields placed after it) are not zeroed out at
125 unsigned long refcount; /* direct_io_worker() and bios */ 129 * allocation time. Don't add new fields after pages[] unless you
126 struct bio *bio_list; /* singly linked via bi_private */ 130 * wish that they not be zeroed.
127 struct task_struct *waiter; /* waiting task (NULL if none) */ 131 */
128 132 struct page *pages[DIO_PAGES]; /* page buffer */
129 /* AIO related stuff */
130 struct kiocb *iocb; /* kiocb */
131 int is_async; /* is IO async ? */
132 int io_error; /* IO error in completion path */
133 ssize_t result; /* IO result */
134}; 133};
135 134
136/* 135/*
@@ -240,7 +239,8 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
240 if (dio->end_io && dio->result) 239 if (dio->end_io && dio->result)
241 dio->end_io(dio->iocb, offset, transferred, 240 dio->end_io(dio->iocb, offset, transferred,
242 dio->map_bh.b_private); 241 dio->map_bh.b_private);
243 if (dio->lock_type == DIO_LOCKING) 242
243 if (dio->flags & DIO_LOCKING)
244 /* lockdep: non-owner release */ 244 /* lockdep: non-owner release */
245 up_read_non_owner(&dio->inode->i_alloc_sem); 245 up_read_non_owner(&dio->inode->i_alloc_sem);
246 246
@@ -515,21 +515,24 @@ static int get_more_blocks(struct dio *dio)
515 map_bh->b_state = 0; 515 map_bh->b_state = 0;
516 map_bh->b_size = fs_count << dio->inode->i_blkbits; 516 map_bh->b_size = fs_count << dio->inode->i_blkbits;
517 517
518 /*
519 * For writes inside i_size on a DIO_SKIP_HOLES filesystem we
520 * forbid block creations: only overwrites are permitted.
521 * We will return early to the caller once we see an
522 * unmapped buffer head returned, and the caller will fall
523 * back to buffered I/O.
524 *
525 * Otherwise the decision is left to the get_blocks method,
526 * which may decide to handle it or also return an unmapped
527 * buffer head.
528 */
518 create = dio->rw & WRITE; 529 create = dio->rw & WRITE;
519 if (dio->lock_type == DIO_LOCKING) { 530 if (dio->flags & DIO_SKIP_HOLES) {
520 if (dio->block_in_file < (i_size_read(dio->inode) >> 531 if (dio->block_in_file < (i_size_read(dio->inode) >>
521 dio->blkbits)) 532 dio->blkbits))
522 create = 0; 533 create = 0;
523 } else if (dio->lock_type == DIO_NO_LOCKING) {
524 create = 0;
525 } 534 }
526 535
527 /*
528 * For writes inside i_size we forbid block creations: only
529 * overwrites are permitted. We fall back to buffered writes
530 * at a higher level for inside-i_size block-instantiating
531 * writes.
532 */
533 ret = (*dio->get_block)(dio->inode, fs_startblk, 536 ret = (*dio->get_block)(dio->inode, fs_startblk,
534 map_bh, create); 537 map_bh, create);
535 } 538 }
@@ -1039,7 +1042,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1039 * we can let i_mutex go now that its achieved its purpose 1042 * we can let i_mutex go now that its achieved its purpose
1040 * of protecting us from looking up uninitialized blocks. 1043 * of protecting us from looking up uninitialized blocks.
1041 */ 1044 */
1042 if ((rw == READ) && (dio->lock_type == DIO_LOCKING)) 1045 if (rw == READ && (dio->flags & DIO_LOCKING))
1043 mutex_unlock(&dio->inode->i_mutex); 1046 mutex_unlock(&dio->inode->i_mutex);
1044 1047
1045 /* 1048 /*
@@ -1086,30 +1089,28 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1086 1089
1087/* 1090/*
1088 * This is a library function for use by filesystem drivers. 1091 * This is a library function for use by filesystem drivers.
1089 * The locking rules are governed by the dio_lock_type parameter.
1090 * 1092 *
1091 * DIO_NO_LOCKING (no locking, for raw block device access) 1093 * The locking rules are governed by the flags parameter:
1092 * For writes, i_mutex is not held on entry; it is never taken. 1094 * - if the flags value contains DIO_LOCKING we use a fancy locking
1095 * scheme for dumb filesystems.
1096 * For writes this function is called under i_mutex and returns with
1097 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1098 * taken and dropped again before returning.
1099 * For reads and writes i_alloc_sem is taken in shared mode and released
1100 * on I/O completion (which may happen asynchronously after returning to
1101 * the caller).
1093 * 1102 *
1094 * DIO_LOCKING (simple locking for regular files) 1103 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1095 * For writes we are called under i_mutex and return with i_mutex held, even 1104 * internal locking but rather rely on the filesystem to synchronize
1096 * though it is internally dropped. 1105 * direct I/O reads/writes versus each other and truncate.
1097 * For reads, i_mutex is not held on entry, but it is taken and dropped before 1106 * For reads and writes both i_mutex and i_alloc_sem are not held on
1098 * returning. 1107 * entry and are never taken.
1099 *
1100 * DIO_OWN_LOCKING (filesystem provides synchronisation and handling of
1101 * uninitialised data, allowing parallel direct readers and writers)
1102 * For writes we are called without i_mutex, return without it, never touch it.
1103 * For reads we are called under i_mutex and return with i_mutex held, even
1104 * though it may be internally dropped.
1105 *
1106 * Additional i_alloc_sem locking requirements described inline below.
1107 */ 1108 */
1108ssize_t 1109ssize_t
1109__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1110__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1110 struct block_device *bdev, const struct iovec *iov, loff_t offset, 1111 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1111 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 1112 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1112 int dio_lock_type) 1113 int flags)
1113{ 1114{
1114 int seg; 1115 int seg;
1115 size_t size; 1116 size_t size;
@@ -1120,8 +1121,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1120 ssize_t retval = -EINVAL; 1121 ssize_t retval = -EINVAL;
1121 loff_t end = offset; 1122 loff_t end = offset;
1122 struct dio *dio; 1123 struct dio *dio;
1123 int release_i_mutex = 0;
1124 int acquire_i_mutex = 0;
1125 1124
1126 if (rw & WRITE) 1125 if (rw & WRITE)
1127 rw = WRITE_ODIRECT_PLUG; 1126 rw = WRITE_ODIRECT_PLUG;
@@ -1151,48 +1150,41 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1151 } 1150 }
1152 } 1151 }
1153 1152
1154 dio = kzalloc(sizeof(*dio), GFP_KERNEL); 1153 dio = kmalloc(sizeof(*dio), GFP_KERNEL);
1155 retval = -ENOMEM; 1154 retval = -ENOMEM;
1156 if (!dio) 1155 if (!dio)
1157 goto out; 1156 goto out;
1158
1159 /* 1157 /*
1160 * For block device access DIO_NO_LOCKING is used, 1158 * Believe it or not, zeroing out the page array caused a .5%
1161 * neither readers nor writers do any locking at all 1159 * performance regression in a database benchmark. So, we take
1162 * For regular files using DIO_LOCKING, 1160 * care to only zero out what's needed.
1163 * readers need to grab i_mutex and i_alloc_sem
1164 * writers need to grab i_alloc_sem only (i_mutex is already held)
1165 * For regular files using DIO_OWN_LOCKING,
1166 * neither readers nor writers take any locks here
1167 */ 1161 */
1168 dio->lock_type = dio_lock_type; 1162 memset(dio, 0, offsetof(struct dio, pages));
1169 if (dio_lock_type != DIO_NO_LOCKING) { 1163
1164 dio->flags = flags;
1165 if (dio->flags & DIO_LOCKING) {
1170 /* watch out for a 0 len io from a tricksy fs */ 1166 /* watch out for a 0 len io from a tricksy fs */
1171 if (rw == READ && end > offset) { 1167 if (rw == READ && end > offset) {
1172 struct address_space *mapping; 1168 struct address_space *mapping =
1169 iocb->ki_filp->f_mapping;
1173 1170
1174 mapping = iocb->ki_filp->f_mapping; 1171 /* will be released by direct_io_worker */
1175 if (dio_lock_type != DIO_OWN_LOCKING) { 1172 mutex_lock(&inode->i_mutex);
1176 mutex_lock(&inode->i_mutex);
1177 release_i_mutex = 1;
1178 }
1179 1173
1180 retval = filemap_write_and_wait_range(mapping, offset, 1174 retval = filemap_write_and_wait_range(mapping, offset,
1181 end - 1); 1175 end - 1);
1182 if (retval) { 1176 if (retval) {
1177 mutex_unlock(&inode->i_mutex);
1183 kfree(dio); 1178 kfree(dio);
1184 goto out; 1179 goto out;
1185 } 1180 }
1186
1187 if (dio_lock_type == DIO_OWN_LOCKING) {
1188 mutex_unlock(&inode->i_mutex);
1189 acquire_i_mutex = 1;
1190 }
1191 } 1181 }
1192 1182
1193 if (dio_lock_type == DIO_LOCKING) 1183 /*
1194 /* lockdep: not the owner will release it */ 1184 * Will be released at I/O completion, possibly in a
1195 down_read_non_owner(&inode->i_alloc_sem); 1185 * different thread.
1186 */
1187 down_read_non_owner(&inode->i_alloc_sem);
1196 } 1188 }
1197 1189
1198 /* 1190 /*
@@ -1210,24 +1202,19 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1210 /* 1202 /*
1211 * In case of error extending write may have instantiated a few 1203 * In case of error extending write may have instantiated a few
1212 * blocks outside i_size. Trim these off again for DIO_LOCKING. 1204 * blocks outside i_size. Trim these off again for DIO_LOCKING.
1213 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this by 1205 *
1214 * it's own meaner. 1206 * NOTE: filesystems with their own locking have to handle this
1207 * on their own.
1215 */ 1208 */
1216 if (unlikely(retval < 0 && (rw & WRITE))) { 1209 if (flags & DIO_LOCKING) {
1217 loff_t isize = i_size_read(inode); 1210 if (unlikely((rw & WRITE) && retval < 0)) {
1218 1211 loff_t isize = i_size_read(inode);
1219 if (end > isize && dio_lock_type == DIO_LOCKING) 1212 if (end > isize)
1220 vmtruncate(inode, isize); 1213 vmtruncate(inode, isize);
1214 }
1221 } 1215 }
1222 1216
1223 if (rw == READ && dio_lock_type == DIO_LOCKING)
1224 release_i_mutex = 0;
1225
1226out: 1217out:
1227 if (release_i_mutex)
1228 mutex_unlock(&inode->i_mutex);
1229 else if (acquire_i_mutex)
1230 mutex_lock(&inode->i_mutex);
1231 return retval; 1218 return retval;
1232} 1219}
1233EXPORT_SYMBOL(__blockdev_direct_IO); 1220EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index fbb6e5eed697..7cb0a59f4b9d 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1748,7 +1748,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1748 char *cipher_name, size_t *key_size) 1748 char *cipher_name, size_t *key_size)
1749{ 1749{
1750 char dummy_key[ECRYPTFS_MAX_KEY_BYTES]; 1750 char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
1751 char *full_alg_name; 1751 char *full_alg_name = NULL;
1752 int rc; 1752 int rc;
1753 1753
1754 *key_tfm = NULL; 1754 *key_tfm = NULL;
@@ -1763,7 +1763,6 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1763 if (rc) 1763 if (rc)
1764 goto out; 1764 goto out;
1765 *key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC); 1765 *key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC);
1766 kfree(full_alg_name);
1767 if (IS_ERR(*key_tfm)) { 1766 if (IS_ERR(*key_tfm)) {
1768 rc = PTR_ERR(*key_tfm); 1767 rc = PTR_ERR(*key_tfm);
1769 printk(KERN_ERR "Unable to allocate crypto cipher with name " 1768 printk(KERN_ERR "Unable to allocate crypto cipher with name "
@@ -1786,6 +1785,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1786 goto out; 1785 goto out;
1787 } 1786 }
1788out: 1787out:
1788 kfree(full_alg_name);
1789 return rc; 1789 return rc;
1790} 1790}
1791 1791
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 2dda5ade75bc..8f006a0d6076 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
62 struct inode *lower_inode = 62 struct inode *lower_inode =
63 ecryptfs_inode_to_lower(dentry->d_inode); 63 ecryptfs_inode_to_lower(dentry->d_inode);
64 64
65 fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL); 65 fsstack_copy_attr_all(dentry->d_inode, lower_inode);
66 } 66 }
67out: 67out:
68 return rc; 68 return rc;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 9e944057001b..678172b61be2 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -158,7 +158,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
158 struct dentry *ecryptfs_dentry = file->f_path.dentry; 158 struct dentry *ecryptfs_dentry = file->f_path.dentry;
159 /* Private value of ecryptfs_dentry allocated in 159 /* Private value of ecryptfs_dentry allocated in
160 * ecryptfs_lookup() */ 160 * ecryptfs_lookup() */
161 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 161 struct dentry *lower_dentry;
162 struct ecryptfs_file_info *file_info; 162 struct ecryptfs_file_info *file_info;
163 163
164 mount_crypt_stat = &ecryptfs_superblock_to_private( 164 mount_crypt_stat = &ecryptfs_superblock_to_private(
@@ -191,13 +191,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
191 | ECRYPTFS_ENCRYPTED); 191 | ECRYPTFS_ENCRYPTED);
192 } 192 }
193 mutex_unlock(&crypt_stat->cs_mutex); 193 mutex_unlock(&crypt_stat->cs_mutex);
194 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
195 && !(file->f_flags & O_RDONLY)) {
196 rc = -EPERM;
197 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
198 "file must hence be opened RO\n", __func__);
199 goto out;
200 }
201 if (!ecryptfs_inode_to_private(inode)->lower_file) { 194 if (!ecryptfs_inode_to_private(inode)->lower_file) {
202 rc = ecryptfs_init_persistent_file(ecryptfs_dentry); 195 rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
203 if (rc) { 196 if (rc) {
@@ -208,6 +201,13 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
208 goto out; 201 goto out;
209 } 202 }
210 } 203 }
204 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
205 && !(file->f_flags & O_RDONLY)) {
206 rc = -EPERM;
207 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
208 "file must hence be opened RO\n", __func__);
209 goto out;
210 }
211 ecryptfs_set_file_lower( 211 ecryptfs_set_file_lower(
212 file, ecryptfs_inode_to_private(inode)->lower_file); 212 file, ecryptfs_inode_to_private(inode)->lower_file);
213 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 213 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
@@ -299,7 +299,6 @@ static int ecryptfs_ioctl(struct inode *inode, struct file *file,
299const struct file_operations ecryptfs_dir_fops = { 299const struct file_operations ecryptfs_dir_fops = {
300 .readdir = ecryptfs_readdir, 300 .readdir = ecryptfs_readdir,
301 .ioctl = ecryptfs_ioctl, 301 .ioctl = ecryptfs_ioctl,
302 .mmap = generic_file_mmap,
303 .open = ecryptfs_open, 302 .open = ecryptfs_open,
304 .flush = ecryptfs_flush, 303 .flush = ecryptfs_flush,
305 .release = ecryptfs_release, 304 .release = ecryptfs_release,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 056fed62d0de..4a430ab4115c 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -282,7 +282,8 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
282 goto out; 282 goto out;
283 } 283 }
284 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, 284 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
285 ecryptfs_dir_inode->i_sb, 1); 285 ecryptfs_dir_inode->i_sb,
286 ECRYPTFS_INTERPOSE_FLAG_D_ADD);
286 if (rc) { 287 if (rc) {
287 printk(KERN_ERR "%s: Error interposing; rc = [%d]\n", 288 printk(KERN_ERR "%s: Error interposing; rc = [%d]\n",
288 __func__, rc); 289 __func__, rc);
@@ -463,9 +464,6 @@ out_lock:
463 unlock_dir(lower_dir_dentry); 464 unlock_dir(lower_dir_dentry);
464 dput(lower_new_dentry); 465 dput(lower_new_dentry);
465 dput(lower_old_dentry); 466 dput(lower_old_dentry);
466 d_drop(lower_old_dentry);
467 d_drop(new_dentry);
468 d_drop(old_dentry);
469 return rc; 467 return rc;
470} 468}
471 469
@@ -614,6 +612,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
614 struct dentry *lower_new_dentry; 612 struct dentry *lower_new_dentry;
615 struct dentry *lower_old_dir_dentry; 613 struct dentry *lower_old_dir_dentry;
616 struct dentry *lower_new_dir_dentry; 614 struct dentry *lower_new_dir_dentry;
615 struct dentry *trap = NULL;
617 616
618 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); 617 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
619 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); 618 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
@@ -621,14 +620,24 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
621 dget(lower_new_dentry); 620 dget(lower_new_dentry);
622 lower_old_dir_dentry = dget_parent(lower_old_dentry); 621 lower_old_dir_dentry = dget_parent(lower_old_dentry);
623 lower_new_dir_dentry = dget_parent(lower_new_dentry); 622 lower_new_dir_dentry = dget_parent(lower_new_dentry);
624 lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 623 trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
624 /* source should not be ancestor of target */
625 if (trap == lower_old_dentry) {
626 rc = -EINVAL;
627 goto out_lock;
628 }
629 /* target should not be ancestor of source */
630 if (trap == lower_new_dentry) {
631 rc = -ENOTEMPTY;
632 goto out_lock;
633 }
625 rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, 634 rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
626 lower_new_dir_dentry->d_inode, lower_new_dentry); 635 lower_new_dir_dentry->d_inode, lower_new_dentry);
627 if (rc) 636 if (rc)
628 goto out_lock; 637 goto out_lock;
629 fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL); 638 fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
630 if (new_dir != old_dir) 639 if (new_dir != old_dir)
631 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL); 640 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
632out_lock: 641out_lock:
633 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 642 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
634 dput(lower_new_dentry->d_parent); 643 dput(lower_new_dentry->d_parent);
@@ -715,31 +724,31 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
715 /* Released in ecryptfs_put_link(); only release here on error */ 724 /* Released in ecryptfs_put_link(); only release here on error */
716 buf = kmalloc(len, GFP_KERNEL); 725 buf = kmalloc(len, GFP_KERNEL);
717 if (!buf) { 726 if (!buf) {
718 rc = -ENOMEM; 727 buf = ERR_PTR(-ENOMEM);
719 goto out; 728 goto out;
720 } 729 }
721 old_fs = get_fs(); 730 old_fs = get_fs();
722 set_fs(get_ds()); 731 set_fs(get_ds());
723 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); 732 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
724 set_fs(old_fs); 733 set_fs(old_fs);
725 if (rc < 0) 734 if (rc < 0) {
726 goto out_free; 735 kfree(buf);
727 else 736 buf = ERR_PTR(rc);
737 } else
728 buf[rc] = '\0'; 738 buf[rc] = '\0';
729 rc = 0;
730 nd_set_link(nd, buf);
731 goto out;
732out_free:
733 kfree(buf);
734out: 739out:
735 return ERR_PTR(rc); 740 nd_set_link(nd, buf);
741 return NULL;
736} 742}
737 743
738static void 744static void
739ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) 745ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
740{ 746{
741 /* Free the char* */ 747 char *buf = nd_get_link(nd);
742 kfree(nd_get_link(nd)); 748 if (!IS_ERR(buf)) {
749 /* Free the char* */
750 kfree(buf);
751 }
743} 752}
744 753
745/** 754/**
@@ -772,18 +781,23 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat,
772} 781}
773 782
774/** 783/**
775 * ecryptfs_truncate 784 * truncate_upper
776 * @dentry: The ecryptfs layer dentry 785 * @dentry: The ecryptfs layer dentry
777 * @new_length: The length to expand the file to 786 * @ia: Address of the ecryptfs inode's attributes
787 * @lower_ia: Address of the lower inode's attributes
778 * 788 *
779 * Function to handle truncations modifying the size of the file. Note 789 * Function to handle truncations modifying the size of the file. Note
780 * that the file sizes are interpolated. When expanding, we are simply 790 * that the file sizes are interpolated. When expanding, we are simply
781 * writing strings of 0's out. When truncating, we need to modify the 791 * writing strings of 0's out. When truncating, we truncate the upper
782 * underlying file size according to the page index interpolations. 792 * inode and update the lower_ia according to the page index
793 * interpolations. If ATTR_SIZE is set in lower_ia->ia_valid upon return,
794 * the caller must use lower_ia in a call to notify_change() to perform
795 * the truncation of the lower inode.
783 * 796 *
784 * Returns zero on success; non-zero otherwise 797 * Returns zero on success; non-zero otherwise
785 */ 798 */
786int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) 799static int truncate_upper(struct dentry *dentry, struct iattr *ia,
800 struct iattr *lower_ia)
787{ 801{
788 int rc = 0; 802 int rc = 0;
789 struct inode *inode = dentry->d_inode; 803 struct inode *inode = dentry->d_inode;
@@ -794,8 +808,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
794 loff_t lower_size_before_truncate; 808 loff_t lower_size_before_truncate;
795 loff_t lower_size_after_truncate; 809 loff_t lower_size_after_truncate;
796 810
797 if (unlikely((new_length == i_size))) 811 if (unlikely((ia->ia_size == i_size))) {
812 lower_ia->ia_valid &= ~ATTR_SIZE;
798 goto out; 813 goto out;
814 }
799 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 815 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
800 /* Set up a fake ecryptfs file, this is used to interface with 816 /* Set up a fake ecryptfs file, this is used to interface with
801 * the file in the underlying filesystem so that the 817 * the file in the underlying filesystem so that the
@@ -815,28 +831,30 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
815 &fake_ecryptfs_file, 831 &fake_ecryptfs_file,
816 ecryptfs_inode_to_private(dentry->d_inode)->lower_file); 832 ecryptfs_inode_to_private(dentry->d_inode)->lower_file);
817 /* Switch on growing or shrinking file */ 833 /* Switch on growing or shrinking file */
818 if (new_length > i_size) { 834 if (ia->ia_size > i_size) {
819 char zero[] = { 0x00 }; 835 char zero[] = { 0x00 };
820 836
837 lower_ia->ia_valid &= ~ATTR_SIZE;
821 /* Write a single 0 at the last position of the file; 838 /* Write a single 0 at the last position of the file;
822 * this triggers code that will fill in 0's throughout 839 * this triggers code that will fill in 0's throughout
823 * the intermediate portion of the previous end of the 840 * the intermediate portion of the previous end of the
824 * file and the new and of the file */ 841 * file and the new and of the file */
825 rc = ecryptfs_write(&fake_ecryptfs_file, zero, 842 rc = ecryptfs_write(&fake_ecryptfs_file, zero,
826 (new_length - 1), 1); 843 (ia->ia_size - 1), 1);
827 } else { /* new_length < i_size_read(inode) */ 844 } else { /* ia->ia_size < i_size_read(inode) */
828 /* We're chopping off all the pages down do the page 845 /* We're chopping off all the pages down to the page
829 * in which new_length is located. Fill in the end of 846 * in which ia->ia_size is located. Fill in the end of
830 * that page from (new_length & ~PAGE_CACHE_MASK) to 847 * that page from (ia->ia_size & ~PAGE_CACHE_MASK) to
831 * PAGE_CACHE_SIZE with zeros. */ 848 * PAGE_CACHE_SIZE with zeros. */
832 size_t num_zeros = (PAGE_CACHE_SIZE 849 size_t num_zeros = (PAGE_CACHE_SIZE
833 - (new_length & ~PAGE_CACHE_MASK)); 850 - (ia->ia_size & ~PAGE_CACHE_MASK));
834 851
835 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 852 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
836 rc = vmtruncate(inode, new_length); 853 rc = vmtruncate(inode, ia->ia_size);
837 if (rc) 854 if (rc)
838 goto out_free; 855 goto out_free;
839 rc = vmtruncate(lower_dentry->d_inode, new_length); 856 lower_ia->ia_size = ia->ia_size;
857 lower_ia->ia_valid |= ATTR_SIZE;
840 goto out_free; 858 goto out_free;
841 } 859 }
842 if (num_zeros) { 860 if (num_zeros) {
@@ -848,7 +866,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
848 goto out_free; 866 goto out_free;
849 } 867 }
850 rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt, 868 rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt,
851 new_length, num_zeros); 869 ia->ia_size, num_zeros);
852 kfree(zeros_virt); 870 kfree(zeros_virt);
853 if (rc) { 871 if (rc) {
854 printk(KERN_ERR "Error attempting to zero out " 872 printk(KERN_ERR "Error attempting to zero out "
@@ -857,7 +875,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
857 goto out_free; 875 goto out_free;
858 } 876 }
859 } 877 }
860 vmtruncate(inode, new_length); 878 vmtruncate(inode, ia->ia_size);
861 rc = ecryptfs_write_inode_size_to_metadata(inode); 879 rc = ecryptfs_write_inode_size_to_metadata(inode);
862 if (rc) { 880 if (rc) {
863 printk(KERN_ERR "Problem with " 881 printk(KERN_ERR "Problem with "
@@ -870,10 +888,12 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
870 lower_size_before_truncate = 888 lower_size_before_truncate =
871 upper_size_to_lower_size(crypt_stat, i_size); 889 upper_size_to_lower_size(crypt_stat, i_size);
872 lower_size_after_truncate = 890 lower_size_after_truncate =
873 upper_size_to_lower_size(crypt_stat, new_length); 891 upper_size_to_lower_size(crypt_stat, ia->ia_size);
874 if (lower_size_after_truncate < lower_size_before_truncate) 892 if (lower_size_after_truncate < lower_size_before_truncate) {
875 vmtruncate(lower_dentry->d_inode, 893 lower_ia->ia_size = lower_size_after_truncate;
876 lower_size_after_truncate); 894 lower_ia->ia_valid |= ATTR_SIZE;
895 } else
896 lower_ia->ia_valid &= ~ATTR_SIZE;
877 } 897 }
878out_free: 898out_free:
879 if (ecryptfs_file_to_private(&fake_ecryptfs_file)) 899 if (ecryptfs_file_to_private(&fake_ecryptfs_file))
@@ -883,6 +903,33 @@ out:
883 return rc; 903 return rc;
884} 904}
885 905
906/**
907 * ecryptfs_truncate
908 * @dentry: The ecryptfs layer dentry
909 * @new_length: The length to expand the file to
910 *
911 * Simple function that handles the truncation of an eCryptfs inode and
912 * its corresponding lower inode.
913 *
914 * Returns zero on success; non-zero otherwise
915 */
916int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
917{
918 struct iattr ia = { .ia_valid = ATTR_SIZE, .ia_size = new_length };
919 struct iattr lower_ia = { .ia_valid = 0 };
920 int rc;
921
922 rc = truncate_upper(dentry, &ia, &lower_ia);
923 if (!rc && lower_ia.ia_valid & ATTR_SIZE) {
924 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
925
926 mutex_lock(&lower_dentry->d_inode->i_mutex);
927 rc = notify_change(lower_dentry, &lower_ia);
928 mutex_unlock(&lower_dentry->d_inode->i_mutex);
929 }
930 return rc;
931}
932
886static int 933static int
887ecryptfs_permission(struct inode *inode, int mask) 934ecryptfs_permission(struct inode *inode, int mask)
888{ 935{
@@ -905,6 +952,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
905{ 952{
906 int rc = 0; 953 int rc = 0;
907 struct dentry *lower_dentry; 954 struct dentry *lower_dentry;
955 struct iattr lower_ia;
908 struct inode *inode; 956 struct inode *inode;
909 struct inode *lower_inode; 957 struct inode *lower_inode;
910 struct ecryptfs_crypt_stat *crypt_stat; 958 struct ecryptfs_crypt_stat *crypt_stat;
@@ -943,15 +991,11 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
943 } 991 }
944 } 992 }
945 mutex_unlock(&crypt_stat->cs_mutex); 993 mutex_unlock(&crypt_stat->cs_mutex);
994 memcpy(&lower_ia, ia, sizeof(lower_ia));
995 if (ia->ia_valid & ATTR_FILE)
996 lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file);
946 if (ia->ia_valid & ATTR_SIZE) { 997 if (ia->ia_valid & ATTR_SIZE) {
947 ecryptfs_printk(KERN_DEBUG, 998 rc = truncate_upper(dentry, ia, &lower_ia);
948 "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n",
949 ia->ia_valid, ATTR_SIZE);
950 rc = ecryptfs_truncate(dentry, ia->ia_size);
951 /* ecryptfs_truncate handles resizing of the lower file */
952 ia->ia_valid &= ~ATTR_SIZE;
953 ecryptfs_printk(KERN_DEBUG, "ia->ia_valid = [%x]\n",
954 ia->ia_valid);
955 if (rc < 0) 999 if (rc < 0)
956 goto out; 1000 goto out;
957 } 1001 }
@@ -960,14 +1004,29 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
960 * mode change is for clearing setuid/setgid bits. Allow lower fs 1004 * mode change is for clearing setuid/setgid bits. Allow lower fs
961 * to interpret this in its own way. 1005 * to interpret this in its own way.
962 */ 1006 */
963 if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) 1007 if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
964 ia->ia_valid &= ~ATTR_MODE; 1008 lower_ia.ia_valid &= ~ATTR_MODE;
965 1009
966 mutex_lock(&lower_dentry->d_inode->i_mutex); 1010 mutex_lock(&lower_dentry->d_inode->i_mutex);
967 rc = notify_change(lower_dentry, ia); 1011 rc = notify_change(lower_dentry, &lower_ia);
968 mutex_unlock(&lower_dentry->d_inode->i_mutex); 1012 mutex_unlock(&lower_dentry->d_inode->i_mutex);
969out: 1013out:
970 fsstack_copy_attr_all(inode, lower_inode, NULL); 1014 fsstack_copy_attr_all(inode, lower_inode);
1015 return rc;
1016}
1017
1018int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1019 struct kstat *stat)
1020{
1021 struct kstat lower_stat;
1022 int rc;
1023
1024 rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry),
1025 ecryptfs_dentry_to_lower(dentry), &lower_stat);
1026 if (!rc) {
1027 generic_fillattr(dentry->d_inode, stat);
1028 stat->blocks = lower_stat.blocks;
1029 }
971 return rc; 1030 return rc;
972} 1031}
973 1032
@@ -1100,6 +1159,7 @@ const struct inode_operations ecryptfs_dir_iops = {
1100const struct inode_operations ecryptfs_main_iops = { 1159const struct inode_operations ecryptfs_main_iops = {
1101 .permission = ecryptfs_permission, 1160 .permission = ecryptfs_permission,
1102 .setattr = ecryptfs_setattr, 1161 .setattr = ecryptfs_setattr,
1162 .getattr = ecryptfs_getattr,
1103 .setxattr = ecryptfs_setxattr, 1163 .setxattr = ecryptfs_setxattr,
1104 .getxattr = ecryptfs_getxattr, 1164 .getxattr = ecryptfs_getxattr,
1105 .listxattr = ecryptfs_listxattr, 1165 .listxattr = ecryptfs_listxattr,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index c6ac85d6c701..ea2f92101dfe 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -35,7 +35,6 @@
35#include <linux/key.h> 35#include <linux/key.h>
36#include <linux/parser.h> 36#include <linux/parser.h>
37#include <linux/fs_stack.h> 37#include <linux/fs_stack.h>
38#include <linux/ima.h>
39#include "ecryptfs_kernel.h" 38#include "ecryptfs_kernel.h"
40 39
41/** 40/**
@@ -119,7 +118,6 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
119 const struct cred *cred = current_cred(); 118 const struct cred *cred = current_cred();
120 struct ecryptfs_inode_info *inode_info = 119 struct ecryptfs_inode_info *inode_info =
121 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); 120 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
122 int opened_lower_file = 0;
123 int rc = 0; 121 int rc = 0;
124 122
125 mutex_lock(&inode_info->lower_file_mutex); 123 mutex_lock(&inode_info->lower_file_mutex);
@@ -136,12 +134,9 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
136 "for lower_dentry [0x%p] and lower_mnt [0x%p]; " 134 "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
137 "rc = [%d]\n", lower_dentry, lower_mnt, rc); 135 "rc = [%d]\n", lower_dentry, lower_mnt, rc);
138 inode_info->lower_file = NULL; 136 inode_info->lower_file = NULL;
139 } else 137 }
140 opened_lower_file = 1;
141 } 138 }
142 mutex_unlock(&inode_info->lower_file_mutex); 139 mutex_unlock(&inode_info->lower_file_mutex);
143 if (opened_lower_file)
144 ima_counts_get(inode_info->lower_file);
145 return rc; 140 return rc;
146} 141}
147 142
@@ -194,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
194 init_special_inode(inode, lower_inode->i_mode, 189 init_special_inode(inode, lower_inode->i_mode,
195 lower_inode->i_rdev); 190 lower_inode->i_rdev);
196 dentry->d_op = &ecryptfs_dops; 191 dentry->d_op = &ecryptfs_dops;
197 fsstack_copy_attr_all(inode, lower_inode, NULL); 192 fsstack_copy_attr_all(inode, lower_inode);
198 /* This size will be overwritten for real files w/ headers and 193 /* This size will be overwritten for real files w/ headers and
199 * other metadata */ 194 * other metadata */
200 fsstack_copy_inode_size(inode, lower_inode); 195 fsstack_copy_inode_size(inode, lower_inode);
@@ -590,8 +585,8 @@ out:
590 * with as much information as it can before needing 585 * with as much information as it can before needing
591 * the lower filesystem. 586 * the lower filesystem.
592 * ecryptfs_read_super(): this accesses the lower filesystem and uses 587 * ecryptfs_read_super(): this accesses the lower filesystem and uses
593 * ecryptfs_interpolate to perform most of the linking 588 * ecryptfs_interpose to perform most of the linking
594 * ecryptfs_interpolate(): links the lower filesystem into ecryptfs 589 * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c)
595 */ 590 */
596static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, 591static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
597 const char *dev_name, void *raw_data, 592 const char *dev_name, void *raw_data,
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 8b47e4200e65..d26402ff06ea 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -339,7 +339,7 @@ struct file *eventfd_file_create(unsigned int count, int flags)
339 ctx->flags = flags; 339 ctx->flags = flags;
340 340
341 file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, 341 file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx,
342 flags & EFD_SHARED_FCNTL_FLAGS); 342 O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
343 if (IS_ERR(file)) 343 if (IS_ERR(file))
344 eventfd_free_ctx(ctx); 344 eventfd_free_ctx(ctx);
345 345
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 366c503f9657..bd056a5b4efc 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1206,7 +1206,7 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
1206 * a file structure and a free file descriptor. 1206 * a file structure and a free file descriptor.
1207 */ 1207 */
1208 error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, 1208 error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
1209 flags & O_CLOEXEC); 1209 O_RDWR | (flags & O_CLOEXEC));
1210 if (error < 0) 1210 if (error < 0)
1211 ep_free(ep); 1211 ep_free(ep);
1212 1212
diff --git a/fs/exec.c b/fs/exec.c
index c0c636e34f60..632b02e34ec7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -826,7 +826,9 @@ static int de_thread(struct task_struct *tsk)
826 attach_pid(tsk, PIDTYPE_PID, task_pid(leader)); 826 attach_pid(tsk, PIDTYPE_PID, task_pid(leader));
827 transfer_pid(leader, tsk, PIDTYPE_PGID); 827 transfer_pid(leader, tsk, PIDTYPE_PGID);
828 transfer_pid(leader, tsk, PIDTYPE_SID); 828 transfer_pid(leader, tsk, PIDTYPE_SID);
829
829 list_replace_rcu(&leader->tasks, &tsk->tasks); 830 list_replace_rcu(&leader->tasks, &tsk->tasks);
831 list_replace_init(&leader->sibling, &tsk->sibling);
830 832
831 tsk->group_leader = tsk; 833 tsk->group_leader = tsk;
832 leader->group_leader = tsk; 834 leader->group_leader = tsk;
@@ -923,6 +925,15 @@ char *get_task_comm(char *buf, struct task_struct *tsk)
923void set_task_comm(struct task_struct *tsk, char *buf) 925void set_task_comm(struct task_struct *tsk, char *buf)
924{ 926{
925 task_lock(tsk); 927 task_lock(tsk);
928
929 /*
930 * Threads may access current->comm without holding
931 * the task lock, so write the string carefully.
932 * Readers without a lock may see incomplete new
933 * names but are safe from non-terminating string reads.
934 */
935 memset(tsk->comm, 0, TASK_COMM_LEN);
936 wmb();
926 strlcpy(tsk->comm, buf, sizeof(tsk->comm)); 937 strlcpy(tsk->comm, buf, sizeof(tsk->comm));
927 task_unlock(tsk); 938 task_unlock(tsk);
928 perf_event_comm(tsk); 939 perf_event_comm(tsk);
@@ -1752,17 +1763,20 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1752 struct mm_struct *mm = current->mm; 1763 struct mm_struct *mm = current->mm;
1753 struct linux_binfmt * binfmt; 1764 struct linux_binfmt * binfmt;
1754 struct inode * inode; 1765 struct inode * inode;
1755 struct file * file;
1756 const struct cred *old_cred; 1766 const struct cred *old_cred;
1757 struct cred *cred; 1767 struct cred *cred;
1758 int retval = 0; 1768 int retval = 0;
1759 int flag = 0; 1769 int flag = 0;
1760 int ispipe = 0; 1770 int ispipe = 0;
1761 unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1762 char **helper_argv = NULL; 1771 char **helper_argv = NULL;
1763 int helper_argc = 0; 1772 int helper_argc = 0;
1764 int dump_count = 0; 1773 int dump_count = 0;
1765 static atomic_t core_dump_count = ATOMIC_INIT(0); 1774 static atomic_t core_dump_count = ATOMIC_INIT(0);
1775 struct coredump_params cprm = {
1776 .signr = signr,
1777 .regs = regs,
1778 .limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
1779 };
1766 1780
1767 audit_core_dumps(signr); 1781 audit_core_dumps(signr);
1768 1782
@@ -1818,15 +1832,15 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1818 ispipe = format_corename(corename, signr); 1832 ispipe = format_corename(corename, signr);
1819 unlock_kernel(); 1833 unlock_kernel();
1820 1834
1821 if ((!ispipe) && (core_limit < binfmt->min_coredump)) 1835 if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
1822 goto fail_unlock; 1836 goto fail_unlock;
1823 1837
1824 if (ispipe) { 1838 if (ispipe) {
1825 if (core_limit == 0) { 1839 if (cprm.limit == 0) {
1826 /* 1840 /*
1827 * Normally core limits are irrelevant to pipes, since 1841 * Normally core limits are irrelevant to pipes, since
1828 * we're not writing to the file system, but we use 1842 * we're not writing to the file system, but we use
1829 * core_limit of 0 here as a speacial value. Any 1843 * cprm.limit of 0 here as a speacial value. Any
1830 * non-zero limit gets set to RLIM_INFINITY below, but 1844 * non-zero limit gets set to RLIM_INFINITY below, but
1831 * a limit of 0 skips the dump. This is a consistent 1845 * a limit of 0 skips the dump. This is a consistent
1832 * way to catch recursive crashes. We can still crash 1846 * way to catch recursive crashes. We can still crash
@@ -1859,25 +1873,25 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1859 goto fail_dropcount; 1873 goto fail_dropcount;
1860 } 1874 }
1861 1875
1862 core_limit = RLIM_INFINITY; 1876 cprm.limit = RLIM_INFINITY;
1863 1877
1864 /* SIGPIPE can happen, but it's just never processed */ 1878 /* SIGPIPE can happen, but it's just never processed */
1865 if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL, 1879 if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
1866 &file)) { 1880 &cprm.file)) {
1867 printk(KERN_INFO "Core dump to %s pipe failed\n", 1881 printk(KERN_INFO "Core dump to %s pipe failed\n",
1868 corename); 1882 corename);
1869 goto fail_dropcount; 1883 goto fail_dropcount;
1870 } 1884 }
1871 } else 1885 } else
1872 file = filp_open(corename, 1886 cprm.file = filp_open(corename,
1873 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 1887 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
1874 0600); 1888 0600);
1875 if (IS_ERR(file)) 1889 if (IS_ERR(cprm.file))
1876 goto fail_dropcount; 1890 goto fail_dropcount;
1877 inode = file->f_path.dentry->d_inode; 1891 inode = cprm.file->f_path.dentry->d_inode;
1878 if (inode->i_nlink > 1) 1892 if (inode->i_nlink > 1)
1879 goto close_fail; /* multiple links - don't dump */ 1893 goto close_fail; /* multiple links - don't dump */
1880 if (!ispipe && d_unhashed(file->f_path.dentry)) 1894 if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
1881 goto close_fail; 1895 goto close_fail;
1882 1896
1883 /* AK: actually i see no reason to not allow this for named pipes etc., 1897 /* AK: actually i see no reason to not allow this for named pipes etc.,
@@ -1890,21 +1904,22 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1890 */ 1904 */
1891 if (inode->i_uid != current_fsuid()) 1905 if (inode->i_uid != current_fsuid())
1892 goto close_fail; 1906 goto close_fail;
1893 if (!file->f_op) 1907 if (!cprm.file->f_op)
1894 goto close_fail; 1908 goto close_fail;
1895 if (!file->f_op->write) 1909 if (!cprm.file->f_op->write)
1896 goto close_fail; 1910 goto close_fail;
1897 if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0) 1911 if (!ispipe &&
1912 do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
1898 goto close_fail; 1913 goto close_fail;
1899 1914
1900 retval = binfmt->core_dump(signr, regs, file, core_limit); 1915 retval = binfmt->core_dump(&cprm);
1901 1916
1902 if (retval) 1917 if (retval)
1903 current->signal->group_exit_code |= 0x80; 1918 current->signal->group_exit_code |= 0x80;
1904close_fail: 1919close_fail:
1905 if (ispipe && core_pipe_limit) 1920 if (ispipe && core_pipe_limit)
1906 wait_for_dump_helpers(file); 1921 wait_for_dump_helpers(cprm.file);
1907 filp_close(file, NULL); 1922 filp_close(cprm.file, NULL);
1908fail_dropcount: 1923fail_dropcount:
1909 if (dump_count) 1924 if (dump_count)
1910 atomic_dec(&core_dump_count); 1925 atomic_dec(&core_dump_count);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 698a8636d39c..2afbcebeda71 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -738,13 +738,28 @@ static int exofs_write_begin_export(struct file *file,
738 fsdata); 738 fsdata);
739} 739}
740 740
741static int exofs_write_end(struct file *file, struct address_space *mapping,
742 loff_t pos, unsigned len, unsigned copied,
743 struct page *page, void *fsdata)
744{
745 struct inode *inode = mapping->host;
746 /* According to comment in simple_write_end i_mutex is held */
747 loff_t i_size = inode->i_size;
748 int ret;
749
750 ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata);
751 if (i_size != inode->i_size)
752 mark_inode_dirty(inode);
753 return ret;
754}
755
741const struct address_space_operations exofs_aops = { 756const struct address_space_operations exofs_aops = {
742 .readpage = exofs_readpage, 757 .readpage = exofs_readpage,
743 .readpages = exofs_readpages, 758 .readpages = exofs_readpages,
744 .writepage = exofs_writepage, 759 .writepage = exofs_writepage,
745 .writepages = exofs_writepages, 760 .writepages = exofs_writepages,
746 .write_begin = exofs_write_begin_export, 761 .write_begin = exofs_write_begin_export,
747 .write_end = simple_write_end, 762 .write_end = exofs_write_end,
748}; 763};
749 764
750/****************************************************************************** 765/******************************************************************************
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h
index 423033addd1f..c52e9888b8ab 100644
--- a/fs/exofs/pnfs.h
+++ b/fs/exofs/pnfs.h
@@ -15,13 +15,7 @@
15#ifndef __EXOFS_PNFS_H__ 15#ifndef __EXOFS_PNFS_H__
16#define __EXOFS_PNFS_H__ 16#define __EXOFS_PNFS_H__
17 17
18#if defined(CONFIG_PNFS) 18#if ! defined(__PNFS_OSD_XDR_H__)
19
20
21/* FIXME: move this file to: linux/exportfs/pnfs_osd_xdr.h */
22#include "../nfs/objlayout/pnfs_osd_xdr.h"
23
24#else /* defined(CONFIG_PNFS) */
25 19
26enum pnfs_iomode { 20enum pnfs_iomode {
27 IOMODE_READ = 1, 21 IOMODE_READ = 1,
@@ -46,6 +40,6 @@ struct pnfs_osd_data_map {
46 u32 odm_raid_algorithm; 40 u32 odm_raid_algorithm;
47}; 41};
48 42
49#endif /* else defined(CONFIG_PNFS) */ 43#endif /* ! defined(__PNFS_OSD_XDR_H__) */
50 44
51#endif /* __EXOFS_PNFS_H__ */ 45#endif /* __EXOFS_PNFS_H__ */
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 197c7db583c7..e9e175949a63 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -6,7 +6,7 @@
6 * and for mapping back from file handles to dentries. 6 * and for mapping back from file handles to dentries.
7 * 7 *
8 * For details on why we do all the strange and hairy things in here 8 * For details on why we do all the strange and hairy things in here
9 * take a look at Documentation/filesystems/Exporting. 9 * take a look at Documentation/filesystems/nfs/Exporting.
10 */ 10 */
11#include <linux/exportfs.h> 11#include <linux/exportfs.h>
12#include <linux/fs.h> 12#include <linux/fs.h>
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index a63d44256a70..a99e54318c3d 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -339,12 +339,12 @@ ext2_acl_chmod(struct inode *inode)
339 * Extended attribut handlers 339 * Extended attribut handlers
340 */ 340 */
341static size_t 341static size_t
342ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size, 342ext2_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_size,
343 const char *name, size_t name_len) 343 const char *name, size_t name_len, int type)
344{ 344{
345 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 345 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
346 346
347 if (!test_opt(inode->i_sb, POSIX_ACL)) 347 if (!test_opt(dentry->d_sb, POSIX_ACL))
348 return 0; 348 return 0;
349 if (list && size <= list_size) 349 if (list && size <= list_size)
350 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 350 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -352,12 +352,12 @@ ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
352} 352}
353 353
354static size_t 354static size_t
355ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size, 355ext2_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_size,
356 const char *name, size_t name_len) 356 const char *name, size_t name_len, int type)
357{ 357{
358 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 358 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
359 359
360 if (!test_opt(inode->i_sb, POSIX_ACL)) 360 if (!test_opt(dentry->d_sb, POSIX_ACL))
361 return 0; 361 return 0;
362 if (list && size <= list_size) 362 if (list && size <= list_size)
363 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 363 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -365,15 +365,18 @@ ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
365} 365}
366 366
367static int 367static int
368ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 368ext2_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
369 size_t size, int type)
369{ 370{
370 struct posix_acl *acl; 371 struct posix_acl *acl;
371 int error; 372 int error;
372 373
373 if (!test_opt(inode->i_sb, POSIX_ACL)) 374 if (strcmp(name, "") != 0)
375 return -EINVAL;
376 if (!test_opt(dentry->d_sb, POSIX_ACL))
374 return -EOPNOTSUPP; 377 return -EOPNOTSUPP;
375 378
376 acl = ext2_get_acl(inode, type); 379 acl = ext2_get_acl(dentry->d_inode, type);
377 if (IS_ERR(acl)) 380 if (IS_ERR(acl))
378 return PTR_ERR(acl); 381 return PTR_ERR(acl);
379 if (acl == NULL) 382 if (acl == NULL)
@@ -385,33 +388,17 @@ ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
385} 388}
386 389
387static int 390static int
388ext2_xattr_get_acl_access(struct inode *inode, const char *name, 391ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
389 void *buffer, size_t size) 392 size_t size, int flags, int type)
390{
391 if (strcmp(name, "") != 0)
392 return -EINVAL;
393 return ext2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
394}
395
396static int
397ext2_xattr_get_acl_default(struct inode *inode, const char *name,
398 void *buffer, size_t size)
399{
400 if (strcmp(name, "") != 0)
401 return -EINVAL;
402 return ext2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
403}
404
405static int
406ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
407 size_t size)
408{ 393{
409 struct posix_acl *acl; 394 struct posix_acl *acl;
410 int error; 395 int error;
411 396
412 if (!test_opt(inode->i_sb, POSIX_ACL)) 397 if (strcmp(name, "") != 0)
398 return -EINVAL;
399 if (!test_opt(dentry->d_sb, POSIX_ACL))
413 return -EOPNOTSUPP; 400 return -EOPNOTSUPP;
414 if (!is_owner_or_cap(inode)) 401 if (!is_owner_or_cap(dentry->d_inode))
415 return -EPERM; 402 return -EPERM;
416 403
417 if (value) { 404 if (value) {
@@ -426,41 +413,25 @@ ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
426 } else 413 } else
427 acl = NULL; 414 acl = NULL;
428 415
429 error = ext2_set_acl(inode, type, acl); 416 error = ext2_set_acl(dentry->d_inode, type, acl);
430 417
431release_and_out: 418release_and_out:
432 posix_acl_release(acl); 419 posix_acl_release(acl);
433 return error; 420 return error;
434} 421}
435 422
436static int
437ext2_xattr_set_acl_access(struct inode *inode, const char *name,
438 const void *value, size_t size, int flags)
439{
440 if (strcmp(name, "") != 0)
441 return -EINVAL;
442 return ext2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
443}
444
445static int
446ext2_xattr_set_acl_default(struct inode *inode, const char *name,
447 const void *value, size_t size, int flags)
448{
449 if (strcmp(name, "") != 0)
450 return -EINVAL;
451 return ext2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
452}
453
454struct xattr_handler ext2_xattr_acl_access_handler = { 423struct xattr_handler ext2_xattr_acl_access_handler = {
455 .prefix = POSIX_ACL_XATTR_ACCESS, 424 .prefix = POSIX_ACL_XATTR_ACCESS,
425 .flags = ACL_TYPE_ACCESS,
456 .list = ext2_xattr_list_acl_access, 426 .list = ext2_xattr_list_acl_access,
457 .get = ext2_xattr_get_acl_access, 427 .get = ext2_xattr_get_acl,
458 .set = ext2_xattr_set_acl_access, 428 .set = ext2_xattr_set_acl,
459}; 429};
460 430
461struct xattr_handler ext2_xattr_acl_default_handler = { 431struct xattr_handler ext2_xattr_acl_default_handler = {
462 .prefix = POSIX_ACL_XATTR_DEFAULT, 432 .prefix = POSIX_ACL_XATTR_DEFAULT,
433 .flags = ACL_TYPE_DEFAULT,
463 .list = ext2_xattr_list_acl_default, 434 .list = ext2_xattr_list_acl_default,
464 .get = ext2_xattr_get_acl_default, 435 .get = ext2_xattr_get_acl,
465 .set = ext2_xattr_set_acl_default, 436 .set = ext2_xattr_set_acl,
466}; 437};
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index fc2bd05d3559..7516957273ed 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -721,5 +721,5 @@ const struct file_operations ext2_dir_operations = {
721#ifdef CONFIG_COMPAT 721#ifdef CONFIG_COMPAT
722 .compat_ioctl = ext2_compat_ioctl, 722 .compat_ioctl = ext2_compat_ioctl,
723#endif 723#endif
724 .fsync = simple_fsync, 724 .fsync = ext2_fsync,
725}; 725};
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index da318b0fa637..061914add3cf 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -155,6 +155,7 @@ extern void ext2_write_super (struct super_block *);
155extern const struct file_operations ext2_dir_operations; 155extern const struct file_operations ext2_dir_operations;
156 156
157/* file.c */ 157/* file.c */
158extern int ext2_fsync(struct file *file, struct dentry *dentry, int datasync);
158extern const struct inode_operations ext2_file_inode_operations; 159extern const struct inode_operations ext2_file_inode_operations;
159extern const struct file_operations ext2_file_operations; 160extern const struct file_operations ext2_file_operations;
160extern const struct file_operations ext2_xip_file_operations; 161extern const struct file_operations ext2_xip_file_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index a2f3afd1a1c1..586e3589d4c2 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/pagemap.h>
22#include "ext2.h" 23#include "ext2.h"
23#include "xattr.h" 24#include "xattr.h"
24#include "acl.h" 25#include "acl.h"
@@ -38,6 +39,22 @@ static int ext2_release_file (struct inode * inode, struct file * filp)
38 return 0; 39 return 0;
39} 40}
40 41
42int ext2_fsync(struct file *file, struct dentry *dentry, int datasync)
43{
44 int ret;
45 struct super_block *sb = dentry->d_inode->i_sb;
46 struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
47
48 ret = simple_fsync(file, dentry, datasync);
49 if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) {
50 /* We don't really know where the IO error happened... */
51 ext2_error(sb, __func__,
52 "detected IO error when writing metadata buffers");
53 ret = -EIO;
54 }
55 return ret;
56}
57
41/* 58/*
42 * We have mostly NULL's here: the current defaults are ok for 59 * We have mostly NULL's here: the current defaults are ok for
43 * the ext2 filesystem. 60 * the ext2 filesystem.
@@ -55,7 +72,7 @@ const struct file_operations ext2_file_operations = {
55 .mmap = generic_file_mmap, 72 .mmap = generic_file_mmap,
56 .open = generic_file_open, 73 .open = generic_file_open,
57 .release = ext2_release_file, 74 .release = ext2_release_file,
58 .fsync = simple_fsync, 75 .fsync = ext2_fsync,
59 .splice_read = generic_file_splice_read, 76 .splice_read = generic_file_splice_read,
60 .splice_write = generic_file_splice_write, 77 .splice_write = generic_file_splice_write,
61}; 78};
@@ -72,7 +89,7 @@ const struct file_operations ext2_xip_file_operations = {
72 .mmap = xip_file_mmap, 89 .mmap = xip_file_mmap,
73 .open = generic_file_open, 90 .open = generic_file_open,
74 .release = ext2_release_file, 91 .release = ext2_release_file,
75 .fsync = simple_fsync, 92 .fsync = ext2_fsync,
76}; 93};
77#endif 94#endif
78 95
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 1388802b7803..f9cb54a585ce 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1105,9 +1105,30 @@ failed_sbi:
1105 return ret; 1105 return ret;
1106} 1106}
1107 1107
1108static void ext2_clear_super_error(struct super_block *sb)
1109{
1110 struct buffer_head *sbh = EXT2_SB(sb)->s_sbh;
1111
1112 if (buffer_write_io_error(sbh)) {
1113 /*
1114 * Oh, dear. A previous attempt to write the
1115 * superblock failed. This could happen because the
1116 * USB device was yanked out. Or it could happen to
1117 * be a transient write error and maybe the block will
1118 * be remapped. Nothing we can do but to retry the
1119 * write and hope for the best.
1120 */
1121 printk(KERN_ERR "EXT2-fs: %s previous I/O error to "
1122 "superblock detected", sb->s_id);
1123 clear_buffer_write_io_error(sbh);
1124 set_buffer_uptodate(sbh);
1125 }
1126}
1127
1108static void ext2_commit_super (struct super_block * sb, 1128static void ext2_commit_super (struct super_block * sb,
1109 struct ext2_super_block * es) 1129 struct ext2_super_block * es)
1110{ 1130{
1131 ext2_clear_super_error(sb);
1111 es->s_wtime = cpu_to_le32(get_seconds()); 1132 es->s_wtime = cpu_to_le32(get_seconds());
1112 mark_buffer_dirty(EXT2_SB(sb)->s_sbh); 1133 mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
1113 sb->s_dirt = 0; 1134 sb->s_dirt = 0;
@@ -1115,6 +1136,7 @@ static void ext2_commit_super (struct super_block * sb,
1115 1136
1116static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es) 1137static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
1117{ 1138{
1139 ext2_clear_super_error(sb);
1118 es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); 1140 es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
1119 es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); 1141 es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
1120 es->s_wtime = cpu_to_le32(get_seconds()); 1142 es->s_wtime = cpu_to_le32(get_seconds());
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 7913531ec6d5..904f00642f84 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -60,6 +60,7 @@
60#include <linux/mbcache.h> 60#include <linux/mbcache.h>
61#include <linux/quotaops.h> 61#include <linux/quotaops.h>
62#include <linux/rwsem.h> 62#include <linux/rwsem.h>
63#include <linux/security.h>
63#include "ext2.h" 64#include "ext2.h"
64#include "xattr.h" 65#include "xattr.h"
65#include "acl.h" 66#include "acl.h"
@@ -249,8 +250,9 @@ cleanup:
249 * used / required on success. 250 * used / required on success.
250 */ 251 */
251static int 252static int
252ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 253ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
253{ 254{
255 struct inode *inode = dentry->d_inode;
254 struct buffer_head *bh = NULL; 256 struct buffer_head *bh = NULL;
255 struct ext2_xattr_entry *entry; 257 struct ext2_xattr_entry *entry;
256 char *end; 258 char *end;
@@ -300,9 +302,10 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
300 ext2_xattr_handler(entry->e_name_index); 302 ext2_xattr_handler(entry->e_name_index);
301 303
302 if (handler) { 304 if (handler) {
303 size_t size = handler->list(inode, buffer, rest, 305 size_t size = handler->list(dentry, buffer, rest,
304 entry->e_name, 306 entry->e_name,
305 entry->e_name_len); 307 entry->e_name_len,
308 handler->flags);
306 if (buffer) { 309 if (buffer) {
307 if (size > rest) { 310 if (size > rest) {
308 error = -ERANGE; 311 error = -ERANGE;
@@ -330,7 +333,7 @@ cleanup:
330ssize_t 333ssize_t
331ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) 334ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
332{ 335{
333 return ext2_xattr_list(dentry->d_inode, buffer, size); 336 return ext2_xattr_list(dentry, buffer, size);
334} 337}
335 338
336/* 339/*
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 70c0dbdcdcb7..c8155845ac05 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -11,8 +11,8 @@
11#include "xattr.h" 11#include "xattr.h"
12 12
13static size_t 13static size_t
14ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size, 14ext2_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
15 const char *name, size_t name_len) 15 const char *name, size_t name_len, int type)
16{ 16{
17 const int prefix_len = XATTR_SECURITY_PREFIX_LEN; 17 const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
18 const size_t total_len = prefix_len + name_len + 1; 18 const size_t total_len = prefix_len + name_len + 1;
@@ -26,22 +26,22 @@ ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
26} 26}
27 27
28static int 28static int
29ext2_xattr_security_get(struct inode *inode, const char *name, 29ext2_xattr_security_get(struct dentry *dentry, const char *name,
30 void *buffer, size_t size) 30 void *buffer, size_t size, int type)
31{ 31{
32 if (strcmp(name, "") == 0) 32 if (strcmp(name, "") == 0)
33 return -EINVAL; 33 return -EINVAL;
34 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_SECURITY, name, 34 return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
35 buffer, size); 35 buffer, size);
36} 36}
37 37
38static int 38static int
39ext2_xattr_security_set(struct inode *inode, const char *name, 39ext2_xattr_security_set(struct dentry *dentry, const char *name,
40 const void *value, size_t size, int flags) 40 const void *value, size_t size, int flags, int type)
41{ 41{
42 if (strcmp(name, "") == 0) 42 if (strcmp(name, "") == 0)
43 return -EINVAL; 43 return -EINVAL;
44 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, name, 44 return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
45 value, size, flags); 45 value, size, flags);
46} 46}
47 47
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index e8219f8eae9f..2a26d71f4771 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -13,8 +13,8 @@
13#include "xattr.h" 13#include "xattr.h"
14 14
15static size_t 15static size_t
16ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 16ext2_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
17 const char *name, size_t name_len) 17 const char *name, size_t name_len, int type)
18{ 18{
19 const int prefix_len = XATTR_TRUSTED_PREFIX_LEN; 19 const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
20 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
@@ -31,22 +31,22 @@ ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
31} 31}
32 32
33static int 33static int
34ext2_xattr_trusted_get(struct inode *inode, const char *name, 34ext2_xattr_trusted_get(struct dentry *dentry, const char *name,
35 void *buffer, size_t size) 35 void *buffer, size_t size, int type)
36{ 36{
37 if (strcmp(name, "") == 0) 37 if (strcmp(name, "") == 0)
38 return -EINVAL; 38 return -EINVAL;
39 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name, 39 return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
40 buffer, size); 40 buffer, size);
41} 41}
42 42
43static int 43static int
44ext2_xattr_trusted_set(struct inode *inode, const char *name, 44ext2_xattr_trusted_set(struct dentry *dentry, const char *name,
45 const void *value, size_t size, int flags) 45 const void *value, size_t size, int flags, int type)
46{ 46{
47 if (strcmp(name, "") == 0) 47 if (strcmp(name, "") == 0)
48 return -EINVAL; 48 return -EINVAL;
49 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name, 49 return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
50 value, size, flags); 50 value, size, flags);
51} 51}
52 52
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 92495d28c62f..3f6caf3684b4 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -12,13 +12,13 @@
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
15ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size, 15ext2_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
16 const char *name, size_t name_len) 16 const char *name, size_t name_len, int type)
17{ 17{
18 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 18 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
19 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
20 20
21 if (!test_opt(inode->i_sb, XATTR_USER)) 21 if (!test_opt(dentry->d_sb, XATTR_USER))
22 return 0; 22 return 0;
23 23
24 if (list && total_len <= list_size) { 24 if (list && total_len <= list_size) {
@@ -30,27 +30,28 @@ ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
30} 30}
31 31
32static int 32static int
33ext2_xattr_user_get(struct inode *inode, const char *name, 33ext2_xattr_user_get(struct dentry *dentry, const char *name,
34 void *buffer, size_t size) 34 void *buffer, size_t size, int type)
35{ 35{
36 if (strcmp(name, "") == 0) 36 if (strcmp(name, "") == 0)
37 return -EINVAL; 37 return -EINVAL;
38 if (!test_opt(inode->i_sb, XATTR_USER)) 38 if (!test_opt(dentry->d_sb, XATTR_USER))
39 return -EOPNOTSUPP; 39 return -EOPNOTSUPP;
40 return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size); 40 return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_USER,
41 name, buffer, size);
41} 42}
42 43
43static int 44static int
44ext2_xattr_user_set(struct inode *inode, const char *name, 45ext2_xattr_user_set(struct dentry *dentry, const char *name,
45 const void *value, size_t size, int flags) 46 const void *value, size_t size, int flags, int type)
46{ 47{
47 if (strcmp(name, "") == 0) 48 if (strcmp(name, "") == 0)
48 return -EINVAL; 49 return -EINVAL;
49 if (!test_opt(inode->i_sb, XATTR_USER)) 50 if (!test_opt(dentry->d_sb, XATTR_USER))
50 return -EOPNOTSUPP; 51 return -EOPNOTSUPP;
51 52
52 return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, 53 return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_USER,
53 value, size, flags); 54 name, value, size, flags);
54} 55}
55 56
56struct xattr_handler ext2_xattr_user_handler = { 57struct xattr_handler ext2_xattr_user_handler = {
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index c9b0df376b5f..82ba34158661 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -366,12 +366,12 @@ out:
366 * Extended attribute handlers 366 * Extended attribute handlers
367 */ 367 */
368static size_t 368static size_t
369ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, 369ext3_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
370 const char *name, size_t name_len) 370 const char *name, size_t name_len, int type)
371{ 371{
372 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 372 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
373 373
374 if (!test_opt(inode->i_sb, POSIX_ACL)) 374 if (!test_opt(dentry->d_sb, POSIX_ACL))
375 return 0; 375 return 0;
376 if (list && size <= list_len) 376 if (list && size <= list_len)
377 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 377 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -379,12 +379,12 @@ ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
379} 379}
380 380
381static size_t 381static size_t
382ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, 382ext3_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
383 const char *name, size_t name_len) 383 const char *name, size_t name_len, int type)
384{ 384{
385 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 385 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
386 386
387 if (!test_opt(inode->i_sb, POSIX_ACL)) 387 if (!test_opt(dentry->d_sb, POSIX_ACL))
388 return 0; 388 return 0;
389 if (list && size <= list_len) 389 if (list && size <= list_len)
390 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 390 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -392,15 +392,18 @@ ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
392} 392}
393 393
394static int 394static int
395ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 395ext3_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
396 size_t size, int type)
396{ 397{
397 struct posix_acl *acl; 398 struct posix_acl *acl;
398 int error; 399 int error;
399 400
400 if (!test_opt(inode->i_sb, POSIX_ACL)) 401 if (strcmp(name, "") != 0)
402 return -EINVAL;
403 if (!test_opt(dentry->d_sb, POSIX_ACL))
401 return -EOPNOTSUPP; 404 return -EOPNOTSUPP;
402 405
403 acl = ext3_get_acl(inode, type); 406 acl = ext3_get_acl(dentry->d_inode, type);
404 if (IS_ERR(acl)) 407 if (IS_ERR(acl))
405 return PTR_ERR(acl); 408 return PTR_ERR(acl);
406 if (acl == NULL) 409 if (acl == NULL)
@@ -412,31 +415,16 @@ ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
412} 415}
413 416
414static int 417static int
415ext3_xattr_get_acl_access(struct inode *inode, const char *name, 418ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
416 void *buffer, size_t size) 419 size_t size, int flags, int type)
417{
418 if (strcmp(name, "") != 0)
419 return -EINVAL;
420 return ext3_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
421}
422
423static int
424ext3_xattr_get_acl_default(struct inode *inode, const char *name,
425 void *buffer, size_t size)
426{
427 if (strcmp(name, "") != 0)
428 return -EINVAL;
429 return ext3_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
430}
431
432static int
433ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
434 size_t size)
435{ 420{
421 struct inode *inode = dentry->d_inode;
436 handle_t *handle; 422 handle_t *handle;
437 struct posix_acl *acl; 423 struct posix_acl *acl;
438 int error, retries = 0; 424 int error, retries = 0;
439 425
426 if (strcmp(name, "") != 0)
427 return -EINVAL;
440 if (!test_opt(inode->i_sb, POSIX_ACL)) 428 if (!test_opt(inode->i_sb, POSIX_ACL))
441 return -EOPNOTSUPP; 429 return -EOPNOTSUPP;
442 if (!is_owner_or_cap(inode)) 430 if (!is_owner_or_cap(inode))
@@ -468,34 +456,18 @@ release_and_out:
468 return error; 456 return error;
469} 457}
470 458
471static int
472ext3_xattr_set_acl_access(struct inode *inode, const char *name,
473 const void *value, size_t size, int flags)
474{
475 if (strcmp(name, "") != 0)
476 return -EINVAL;
477 return ext3_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
478}
479
480static int
481ext3_xattr_set_acl_default(struct inode *inode, const char *name,
482 const void *value, size_t size, int flags)
483{
484 if (strcmp(name, "") != 0)
485 return -EINVAL;
486 return ext3_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
487}
488
489struct xattr_handler ext3_xattr_acl_access_handler = { 459struct xattr_handler ext3_xattr_acl_access_handler = {
490 .prefix = POSIX_ACL_XATTR_ACCESS, 460 .prefix = POSIX_ACL_XATTR_ACCESS,
461 .flags = ACL_TYPE_ACCESS,
491 .list = ext3_xattr_list_acl_access, 462 .list = ext3_xattr_list_acl_access,
492 .get = ext3_xattr_get_acl_access, 463 .get = ext3_xattr_get_acl,
493 .set = ext3_xattr_set_acl_access, 464 .set = ext3_xattr_set_acl,
494}; 465};
495 466
496struct xattr_handler ext3_xattr_acl_default_handler = { 467struct xattr_handler ext3_xattr_acl_default_handler = {
497 .prefix = POSIX_ACL_XATTR_DEFAULT, 468 .prefix = POSIX_ACL_XATTR_DEFAULT,
469 .flags = ACL_TYPE_DEFAULT,
498 .list = ext3_xattr_list_acl_default, 470 .list = ext3_xattr_list_acl_default,
499 .get = ext3_xattr_get_acl_default, 471 .get = ext3_xattr_get_acl,
500 .set = ext3_xattr_set_acl_default, 472 .set = ext3_xattr_set_acl,
501}; 473};
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ad14227f509e..455e6e6e5cb9 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -970,7 +970,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
970 if (max_blocks > DIO_MAX_BLOCKS) 970 if (max_blocks > DIO_MAX_BLOCKS)
971 max_blocks = DIO_MAX_BLOCKS; 971 max_blocks = DIO_MAX_BLOCKS;
972 handle = ext3_journal_start(inode, DIO_CREDITS + 972 handle = ext3_journal_start(inode, DIO_CREDITS +
973 2 * EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb)); 973 EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
974 if (IS_ERR(handle)) { 974 if (IS_ERR(handle)) {
975 ret = PTR_ERR(handle); 975 ret = PTR_ERR(handle);
976 goto out; 976 goto out;
@@ -3146,8 +3146,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3146 3146
3147 /* (user+group)*(old+new) structure, inode write (sb, 3147 /* (user+group)*(old+new) structure, inode write (sb,
3148 * inode block, ? - but truncate inode update has it) */ 3148 * inode block, ? - but truncate inode update has it) */
3149 handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+ 3149 handle = ext3_journal_start(inode, EXT3_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
3150 EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3); 3150 EXT3_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)+3);
3151 if (IS_ERR(handle)) { 3151 if (IS_ERR(handle)) {
3152 error = PTR_ERR(handle); 3152 error = PTR_ERR(handle);
3153 goto err_out; 3153 goto err_out;
@@ -3239,7 +3239,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
3239#ifdef CONFIG_QUOTA 3239#ifdef CONFIG_QUOTA
3240 /* We know that structure was already allocated during vfs_dq_init so 3240 /* We know that structure was already allocated during vfs_dq_init so
3241 * we will be updating only the data blocks + inodes */ 3241 * we will be updating only the data blocks + inodes */
3242 ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb); 3242 ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
3243#endif 3243#endif
3244 3244
3245 return ret; 3245 return ret;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index aad6400c9b77..7b0e44f7d66f 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1699,7 +1699,7 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1699retry: 1699retry:
1700 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 1700 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1701 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1701 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1702 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); 1702 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1703 if (IS_ERR(handle)) 1703 if (IS_ERR(handle))
1704 return PTR_ERR(handle); 1704 return PTR_ERR(handle);
1705 1705
@@ -1733,7 +1733,7 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
1733retry: 1733retry:
1734 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 1734 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1735 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1735 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1736 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); 1736 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1737 if (IS_ERR(handle)) 1737 if (IS_ERR(handle))
1738 return PTR_ERR(handle); 1738 return PTR_ERR(handle);
1739 1739
@@ -1769,7 +1769,7 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
1769retry: 1769retry:
1770 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 1770 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1771 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1771 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1772 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); 1772 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1773 if (IS_ERR(handle)) 1773 if (IS_ERR(handle))
1774 return PTR_ERR(handle); 1774 return PTR_ERR(handle);
1775 1775
@@ -1920,7 +1920,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
1920 struct ext3_iloc iloc; 1920 struct ext3_iloc iloc;
1921 int err = 0, rc; 1921 int err = 0, rc;
1922 1922
1923 lock_super(sb); 1923 mutex_lock(&EXT3_SB(sb)->s_orphan_lock);
1924 if (!list_empty(&EXT3_I(inode)->i_orphan)) 1924 if (!list_empty(&EXT3_I(inode)->i_orphan))
1925 goto out_unlock; 1925 goto out_unlock;
1926 1926
@@ -1929,9 +1929,13 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
1929 1929
1930 /* @@@ FIXME: Observation from aviro: 1930 /* @@@ FIXME: Observation from aviro:
1931 * I think I can trigger J_ASSERT in ext3_orphan_add(). We block 1931 * I think I can trigger J_ASSERT in ext3_orphan_add(). We block
1932 * here (on lock_super()), so race with ext3_link() which might bump 1932 * here (on s_orphan_lock), so race with ext3_link() which might bump
1933 * ->i_nlink. For, say it, character device. Not a regular file, 1933 * ->i_nlink. For, say it, character device. Not a regular file,
1934 * not a directory, not a symlink and ->i_nlink > 0. 1934 * not a directory, not a symlink and ->i_nlink > 0.
1935 *
1936 * tytso, 4/25/2009: I'm not sure how that could happen;
1937 * shouldn't the fs core protect us from these sort of
1938 * unlink()/link() races?
1935 */ 1939 */
1936 J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 1940 J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1937 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); 1941 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
@@ -1968,7 +1972,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
1968 jbd_debug(4, "orphan inode %lu will point to %d\n", 1972 jbd_debug(4, "orphan inode %lu will point to %d\n",
1969 inode->i_ino, NEXT_ORPHAN(inode)); 1973 inode->i_ino, NEXT_ORPHAN(inode));
1970out_unlock: 1974out_unlock:
1971 unlock_super(sb); 1975 mutex_unlock(&EXT3_SB(sb)->s_orphan_lock);
1972 ext3_std_error(inode->i_sb, err); 1976 ext3_std_error(inode->i_sb, err);
1973 return err; 1977 return err;
1974} 1978}
@@ -1986,11 +1990,9 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
1986 struct ext3_iloc iloc; 1990 struct ext3_iloc iloc;
1987 int err = 0; 1991 int err = 0;
1988 1992
1989 lock_super(inode->i_sb); 1993 mutex_lock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
1990 if (list_empty(&ei->i_orphan)) { 1994 if (list_empty(&ei->i_orphan))
1991 unlock_super(inode->i_sb); 1995 goto out;
1992 return 0;
1993 }
1994 1996
1995 ino_next = NEXT_ORPHAN(inode); 1997 ino_next = NEXT_ORPHAN(inode);
1996 prev = ei->i_orphan.prev; 1998 prev = ei->i_orphan.prev;
@@ -2040,7 +2042,7 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
2040out_err: 2042out_err:
2041 ext3_std_error(inode->i_sb, err); 2043 ext3_std_error(inode->i_sb, err);
2042out: 2044out:
2043 unlock_super(inode->i_sb); 2045 mutex_unlock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
2044 return err; 2046 return err;
2045 2047
2046out_brelse: 2048out_brelse:
@@ -2175,7 +2177,7 @@ static int ext3_symlink (struct inode * dir,
2175retry: 2177retry:
2176 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2178 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2177 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + 2179 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2178 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb)); 2180 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2179 if (IS_ERR(handle)) 2181 if (IS_ERR(handle))
2180 return PTR_ERR(handle); 2182 return PTR_ERR(handle);
2181 2183
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 5f83b6179178..54351ac7cef9 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -209,7 +209,7 @@ static int setup_new_group_blocks(struct super_block *sb,
209 if (IS_ERR(handle)) 209 if (IS_ERR(handle))
210 return PTR_ERR(handle); 210 return PTR_ERR(handle);
211 211
212 lock_super(sb); 212 mutex_lock(&sbi->s_resize_lock);
213 if (input->group != sbi->s_groups_count) { 213 if (input->group != sbi->s_groups_count) {
214 err = -EBUSY; 214 err = -EBUSY;
215 goto exit_journal; 215 goto exit_journal;
@@ -324,7 +324,7 @@ exit_bh:
324 brelse(bh); 324 brelse(bh);
325 325
326exit_journal: 326exit_journal:
327 unlock_super(sb); 327 mutex_unlock(&sbi->s_resize_lock);
328 if ((err2 = ext3_journal_stop(handle)) && !err) 328 if ((err2 = ext3_journal_stop(handle)) && !err)
329 err = err2; 329 err = err2;
330 330
@@ -662,11 +662,12 @@ exit_free:
662 * important part is that the new block and inode counts are in the backup 662 * important part is that the new block and inode counts are in the backup
663 * superblocks, and the location of the new group metadata in the GDT backups. 663 * superblocks, and the location of the new group metadata in the GDT backups.
664 * 664 *
665 * We do not need lock_super() for this, because these blocks are not 665 * We do not need take the s_resize_lock for this, because these
666 * otherwise touched by the filesystem code when it is mounted. We don't 666 * blocks are not otherwise touched by the filesystem code when it is
667 * need to worry about last changing from sbi->s_groups_count, because the 667 * mounted. We don't need to worry about last changing from
668 * worst that can happen is that we do not copy the full number of backups 668 * sbi->s_groups_count, because the worst that can happen is that we
669 * at this time. The resize which changed s_groups_count will backup again. 669 * do not copy the full number of backups at this time. The resize
670 * which changed s_groups_count will backup again.
670 */ 671 */
671static void update_backups(struct super_block *sb, 672static void update_backups(struct super_block *sb,
672 int blk_off, char *data, int size) 673 int blk_off, char *data, int size)
@@ -825,7 +826,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
825 goto exit_put; 826 goto exit_put;
826 } 827 }
827 828
828 lock_super(sb); 829 mutex_lock(&sbi->s_resize_lock);
829 if (input->group != sbi->s_groups_count) { 830 if (input->group != sbi->s_groups_count) {
830 ext3_warning(sb, __func__, 831 ext3_warning(sb, __func__,
831 "multiple resizers run on filesystem!"); 832 "multiple resizers run on filesystem!");
@@ -856,7 +857,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
856 /* 857 /*
857 * OK, now we've set up the new group. Time to make it active. 858 * OK, now we've set up the new group. Time to make it active.
858 * 859 *
859 * Current kernels don't lock all allocations via lock_super(), 860 * We do not lock all allocations via s_resize_lock
860 * so we have to be safe wrt. concurrent accesses the group 861 * so we have to be safe wrt. concurrent accesses the group
861 * data. So we need to be careful to set all of the relevant 862 * data. So we need to be careful to set all of the relevant
862 * group descriptor data etc. *before* we enable the group. 863 * group descriptor data etc. *before* we enable the group.
@@ -900,12 +901,12 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
900 * 901 *
901 * The precise rules we use are: 902 * The precise rules we use are:
902 * 903 *
903 * * Writers of s_groups_count *must* hold lock_super 904 * * Writers of s_groups_count *must* hold s_resize_lock
904 * AND 905 * AND
905 * * Writers must perform a smp_wmb() after updating all dependent 906 * * Writers must perform a smp_wmb() after updating all dependent
906 * data and before modifying the groups count 907 * data and before modifying the groups count
907 * 908 *
908 * * Readers must hold lock_super() over the access 909 * * Readers must hold s_resize_lock over the access
909 * OR 910 * OR
910 * * Readers must perform an smp_rmb() after reading the groups count 911 * * Readers must perform an smp_rmb() after reading the groups count
911 * and before reading any dependent data. 912 * and before reading any dependent data.
@@ -936,7 +937,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
936 ext3_journal_dirty_metadata(handle, sbi->s_sbh); 937 ext3_journal_dirty_metadata(handle, sbi->s_sbh);
937 938
938exit_journal: 939exit_journal:
939 unlock_super(sb); 940 mutex_unlock(&sbi->s_resize_lock);
940 if ((err2 = ext3_journal_stop(handle)) && !err) 941 if ((err2 = ext3_journal_stop(handle)) && !err)
941 err = err2; 942 err = err2;
942 if (!err) { 943 if (!err) {
@@ -973,7 +974,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
973 974
974 /* We don't need to worry about locking wrt other resizers just 975 /* We don't need to worry about locking wrt other resizers just
975 * yet: we're going to revalidate es->s_blocks_count after 976 * yet: we're going to revalidate es->s_blocks_count after
976 * taking lock_super() below. */ 977 * taking the s_resize_lock below. */
977 o_blocks_count = le32_to_cpu(es->s_blocks_count); 978 o_blocks_count = le32_to_cpu(es->s_blocks_count);
978 o_groups_count = EXT3_SB(sb)->s_groups_count; 979 o_groups_count = EXT3_SB(sb)->s_groups_count;
979 980
@@ -1045,11 +1046,11 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1045 goto exit_put; 1046 goto exit_put;
1046 } 1047 }
1047 1048
1048 lock_super(sb); 1049 mutex_lock(&EXT3_SB(sb)->s_resize_lock);
1049 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { 1050 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
1050 ext3_warning(sb, __func__, 1051 ext3_warning(sb, __func__,
1051 "multiple resizers run on filesystem!"); 1052 "multiple resizers run on filesystem!");
1052 unlock_super(sb); 1053 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1053 ext3_journal_stop(handle); 1054 ext3_journal_stop(handle);
1054 err = -EBUSY; 1055 err = -EBUSY;
1055 goto exit_put; 1056 goto exit_put;
@@ -1059,13 +1060,13 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1059 EXT3_SB(sb)->s_sbh))) { 1060 EXT3_SB(sb)->s_sbh))) {
1060 ext3_warning(sb, __func__, 1061 ext3_warning(sb, __func__,
1061 "error %d on journal write access", err); 1062 "error %d on journal write access", err);
1062 unlock_super(sb); 1063 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1063 ext3_journal_stop(handle); 1064 ext3_journal_stop(handle);
1064 goto exit_put; 1065 goto exit_put;
1065 } 1066 }
1066 es->s_blocks_count = cpu_to_le32(o_blocks_count + add); 1067 es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
1067 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); 1068 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
1068 unlock_super(sb); 1069 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1069 ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count, 1070 ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
1070 o_blocks_count + add); 1071 o_blocks_count + add);
1071 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); 1072 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 7ad1e8c30bd0..afa2b569da10 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1928,6 +1928,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1928 sb->dq_op = &ext3_quota_operations; 1928 sb->dq_op = &ext3_quota_operations;
1929#endif 1929#endif
1930 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1930 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
1931 mutex_init(&sbi->s_orphan_lock);
1932 mutex_init(&sbi->s_resize_lock);
1931 1933
1932 sb->s_root = NULL; 1934 sb->s_root = NULL;
1933 1935
@@ -2014,14 +2016,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
2014 } 2016 }
2015 2017
2016 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); 2018 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
2017 /* 2019
2018 * akpm: core read_super() calls in here with the superblock locked.
2019 * That deadlocks, because orphan cleanup needs to lock the superblock
2020 * in numerous places. Here we just pop the lock - it's relatively
2021 * harmless, because we are now ready to accept write_super() requests,
2022 * and aviro says that's the only reason for hanging onto the
2023 * superblock lock.
2024 */
2025 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; 2020 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
2026 ext3_orphan_cleanup(sb, es); 2021 ext3_orphan_cleanup(sb, es);
2027 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; 2022 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
@@ -2403,13 +2398,11 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
2403 if (journal_flush(journal) < 0) 2398 if (journal_flush(journal) < 0)
2404 goto out; 2399 goto out;
2405 2400
2406 lock_super(sb);
2407 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && 2401 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
2408 sb->s_flags & MS_RDONLY) { 2402 sb->s_flags & MS_RDONLY) {
2409 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2403 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2410 ext3_commit_super(sb, es, 1); 2404 ext3_commit_super(sb, es, 1);
2411 } 2405 }
2412 unlock_super(sb);
2413 2406
2414out: 2407out:
2415 journal_unlock_updates(journal); 2408 journal_unlock_updates(journal);
@@ -2601,13 +2594,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2601 (sbi->s_mount_state & EXT3_VALID_FS)) 2594 (sbi->s_mount_state & EXT3_VALID_FS))
2602 es->s_state = cpu_to_le16(sbi->s_mount_state); 2595 es->s_state = cpu_to_le16(sbi->s_mount_state);
2603 2596
2604 /*
2605 * We have to unlock super so that we can wait for
2606 * transactions.
2607 */
2608 unlock_super(sb);
2609 ext3_mark_recovery_complete(sb, es); 2597 ext3_mark_recovery_complete(sb, es);
2610 lock_super(sb);
2611 } else { 2598 } else {
2612 __le32 ret; 2599 __le32 ret;
2613 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, 2600 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 387d92d00b97..66895ccf76c7 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -99,7 +99,7 @@ static struct buffer_head *ext3_xattr_cache_find(struct inode *,
99 struct mb_cache_entry **); 99 struct mb_cache_entry **);
100static void ext3_xattr_rehash(struct ext3_xattr_header *, 100static void ext3_xattr_rehash(struct ext3_xattr_header *,
101 struct ext3_xattr_entry *); 101 struct ext3_xattr_entry *);
102static int ext3_xattr_list(struct inode *inode, char *buffer, 102static int ext3_xattr_list(struct dentry *dentry, char *buffer,
103 size_t buffer_size); 103 size_t buffer_size);
104 104
105static struct mb_cache *ext3_xattr_cache; 105static struct mb_cache *ext3_xattr_cache;
@@ -147,7 +147,7 @@ ext3_xattr_handler(int name_index)
147ssize_t 147ssize_t
148ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) 148ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
149{ 149{
150 return ext3_xattr_list(dentry->d_inode, buffer, size); 150 return ext3_xattr_list(dentry, buffer, size);
151} 151}
152 152
153static int 153static int
@@ -332,7 +332,7 @@ ext3_xattr_get(struct inode *inode, int name_index, const char *name,
332} 332}
333 333
334static int 334static int
335ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry, 335ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry,
336 char *buffer, size_t buffer_size) 336 char *buffer, size_t buffer_size)
337{ 337{
338 size_t rest = buffer_size; 338 size_t rest = buffer_size;
@@ -342,9 +342,10 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
342 ext3_xattr_handler(entry->e_name_index); 342 ext3_xattr_handler(entry->e_name_index);
343 343
344 if (handler) { 344 if (handler) {
345 size_t size = handler->list(inode, buffer, rest, 345 size_t size = handler->list(dentry, buffer, rest,
346 entry->e_name, 346 entry->e_name,
347 entry->e_name_len); 347 entry->e_name_len,
348 handler->flags);
348 if (buffer) { 349 if (buffer) {
349 if (size > rest) 350 if (size > rest)
350 return -ERANGE; 351 return -ERANGE;
@@ -357,8 +358,9 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
357} 358}
358 359
359static int 360static int
360ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) 361ext3_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
361{ 362{
363 struct inode *inode = dentry->d_inode;
362 struct buffer_head *bh = NULL; 364 struct buffer_head *bh = NULL;
363 int error; 365 int error;
364 366
@@ -383,7 +385,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
383 goto cleanup; 385 goto cleanup;
384 } 386 }
385 ext3_xattr_cache_insert(bh); 387 ext3_xattr_cache_insert(bh);
386 error = ext3_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size); 388 error = ext3_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
387 389
388cleanup: 390cleanup:
389 brelse(bh); 391 brelse(bh);
@@ -392,8 +394,9 @@ cleanup:
392} 394}
393 395
394static int 396static int
395ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) 397ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
396{ 398{
399 struct inode *inode = dentry->d_inode;
397 struct ext3_xattr_ibody_header *header; 400 struct ext3_xattr_ibody_header *header;
398 struct ext3_inode *raw_inode; 401 struct ext3_inode *raw_inode;
399 struct ext3_iloc iloc; 402 struct ext3_iloc iloc;
@@ -411,7 +414,7 @@ ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
411 error = ext3_xattr_check_names(IFIRST(header), end); 414 error = ext3_xattr_check_names(IFIRST(header), end);
412 if (error) 415 if (error)
413 goto cleanup; 416 goto cleanup;
414 error = ext3_xattr_list_entries(inode, IFIRST(header), 417 error = ext3_xattr_list_entries(dentry, IFIRST(header),
415 buffer, buffer_size); 418 buffer, buffer_size);
416 419
417cleanup: 420cleanup:
@@ -430,12 +433,12 @@ cleanup:
430 * used / required on success. 433 * used / required on success.
431 */ 434 */
432static int 435static int
433ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 436ext3_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
434{ 437{
435 int i_error, b_error; 438 int i_error, b_error;
436 439
437 down_read(&EXT3_I(inode)->xattr_sem); 440 down_read(&EXT3_I(dentry->d_inode)->xattr_sem);
438 i_error = ext3_xattr_ibody_list(inode, buffer, buffer_size); 441 i_error = ext3_xattr_ibody_list(dentry, buffer, buffer_size);
439 if (i_error < 0) { 442 if (i_error < 0) {
440 b_error = 0; 443 b_error = 0;
441 } else { 444 } else {
@@ -443,11 +446,11 @@ ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
443 buffer += i_error; 446 buffer += i_error;
444 buffer_size -= i_error; 447 buffer_size -= i_error;
445 } 448 }
446 b_error = ext3_xattr_block_list(inode, buffer, buffer_size); 449 b_error = ext3_xattr_block_list(dentry, buffer, buffer_size);
447 if (b_error < 0) 450 if (b_error < 0)
448 i_error = 0; 451 i_error = 0;
449 } 452 }
450 up_read(&EXT3_I(inode)->xattr_sem); 453 up_read(&EXT3_I(dentry->d_inode)->xattr_sem);
451 return i_error + b_error; 454 return i_error + b_error;
452} 455}
453 456
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 37b81097bdf2..474348788dd9 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -12,8 +12,8 @@
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
15ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size, 15ext3_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
16 const char *name, size_t name_len) 16 const char *name, size_t name_len, int type)
17{ 17{
18 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; 18 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
19 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +28,23 @@ ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
28} 28}
29 29
30static int 30static int
31ext3_xattr_security_get(struct inode *inode, const char *name, 31ext3_xattr_security_get(struct dentry *dentry, const char *name,
32 void *buffer, size_t size) 32 void *buffer, size_t size, int type)
33{ 33{
34 if (strcmp(name, "") == 0) 34 if (strcmp(name, "") == 0)
35 return -EINVAL; 35 return -EINVAL;
36 return ext3_xattr_get(inode, EXT3_XATTR_INDEX_SECURITY, name, 36 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
37 buffer, size); 37 name, buffer, size);
38} 38}
39 39
40static int 40static int
41ext3_xattr_security_set(struct inode *inode, const char *name, 41ext3_xattr_security_set(struct dentry *dentry, const char *name,
42 const void *value, size_t size, int flags) 42 const void *value, size_t size, int flags, int type)
43{ 43{
44 if (strcmp(name, "") == 0) 44 if (strcmp(name, "") == 0)
45 return -EINVAL; 45 return -EINVAL;
46 return ext3_xattr_set(inode, EXT3_XATTR_INDEX_SECURITY, name, 46 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
47 value, size, flags); 47 name, value, size, flags);
48} 48}
49 49
50int 50int
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index c7c41a410c4b..e5562845ed96 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -14,8 +14,8 @@
14#include "xattr.h" 14#include "xattr.h"
15 15
16static size_t 16static size_t
17ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 17ext3_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
18 const char *name, size_t name_len) 18 const char *name, size_t name_len, int type)
19{ 19{
20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
21 const size_t total_len = prefix_len + name_len + 1; 21 const size_t total_len = prefix_len + name_len + 1;
@@ -32,22 +32,22 @@ ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
32} 32}
33 33
34static int 34static int
35ext3_xattr_trusted_get(struct inode *inode, const char *name, 35ext3_xattr_trusted_get(struct dentry *dentry, const char *name,
36 void *buffer, size_t size) 36 void *buffer, size_t size, int type)
37{ 37{
38 if (strcmp(name, "") == 0) 38 if (strcmp(name, "") == 0)
39 return -EINVAL; 39 return -EINVAL;
40 return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name, 40 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED,
41 buffer, size); 41 name, buffer, size);
42} 42}
43 43
44static int 44static int
45ext3_xattr_trusted_set(struct inode *inode, const char *name, 45ext3_xattr_trusted_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 46 const void *value, size_t size, int flags, int type)
47{ 47{
48 if (strcmp(name, "") == 0) 48 if (strcmp(name, "") == 0)
49 return -EINVAL; 49 return -EINVAL;
50 return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name, 50 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED, name,
51 value, size, flags); 51 value, size, flags);
52} 52}
53 53
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 430fe63b31b3..3bcfe9ee0a68 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -13,13 +13,13 @@
13#include "xattr.h" 13#include "xattr.h"
14 14
15static size_t 15static size_t
16ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size, 16ext3_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
17 const char *name, size_t name_len) 17 const char *name, size_t name_len, int type)
18{ 18{
19 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 19 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
20 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
21 21
22 if (!test_opt(inode->i_sb, XATTR_USER)) 22 if (!test_opt(dentry->d_sb, XATTR_USER))
23 return 0; 23 return 0;
24 24
25 if (list && total_len <= list_size) { 25 if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
31} 31}
32 32
33static int 33static int
34ext3_xattr_user_get(struct inode *inode, const char *name, 34ext3_xattr_user_get(struct dentry *dentry, const char *name, void *buffer,
35 void *buffer, size_t size) 35 size_t size, int type)
36{ 36{
37 if (strcmp(name, "") == 0) 37 if (strcmp(name, "") == 0)
38 return -EINVAL; 38 return -EINVAL;
39 if (!test_opt(inode->i_sb, XATTR_USER)) 39 if (!test_opt(dentry->d_sb, XATTR_USER))
40 return -EOPNOTSUPP; 40 return -EOPNOTSUPP;
41 return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size); 41 return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_USER,
42 name, buffer, size);
42} 43}
43 44
44static int 45static int
45ext3_xattr_user_set(struct inode *inode, const char *name, 46ext3_xattr_user_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 47 const void *value, size_t size, int flags, int type)
47{ 48{
48 if (strcmp(name, "") == 0) 49 if (strcmp(name, "") == 0)
49 return -EINVAL; 50 return -EINVAL;
50 if (!test_opt(inode->i_sb, XATTR_USER)) 51 if (!test_opt(dentry->d_sb, XATTR_USER))
51 return -EOPNOTSUPP; 52 return -EOPNOTSUPP;
52 return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name, 53 return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_USER,
53 value, size, flags); 54 name, value, size, flags);
54} 55}
55 56
56struct xattr_handler ext3_xattr_user_handler = { 57struct xattr_handler ext3_xattr_user_handler = {
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 9acf7e808139..9ed1bb1f319f 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -28,6 +28,7 @@ config EXT4_FS
28 28
29config EXT4_USE_FOR_EXT23 29config EXT4_USE_FOR_EXT23
30 bool "Use ext4 for ext2/ext3 file systems" 30 bool "Use ext4 for ext2/ext3 file systems"
31 depends on EXT4_FS
31 depends on EXT3_FS=n || EXT2_FS=n 32 depends on EXT3_FS=n || EXT2_FS=n
32 default y 33 default y
33 help 34 help
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 0df88b2a69b0..8a2a29d35a6f 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -364,12 +364,12 @@ out:
364 * Extended attribute handlers 364 * Extended attribute handlers
365 */ 365 */
366static size_t 366static size_t
367ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, 367ext4_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
368 const char *name, size_t name_len) 368 const char *name, size_t name_len, int type)
369{ 369{
370 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 370 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
371 371
372 if (!test_opt(inode->i_sb, POSIX_ACL)) 372 if (!test_opt(dentry->d_sb, POSIX_ACL))
373 return 0; 373 return 0;
374 if (list && size <= list_len) 374 if (list && size <= list_len)
375 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 375 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -377,12 +377,12 @@ ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
377} 377}
378 378
379static size_t 379static size_t
380ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, 380ext4_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
381 const char *name, size_t name_len) 381 const char *name, size_t name_len, int type)
382{ 382{
383 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 383 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
384 384
385 if (!test_opt(inode->i_sb, POSIX_ACL)) 385 if (!test_opt(dentry->d_sb, POSIX_ACL))
386 return 0; 386 return 0;
387 if (list && size <= list_len) 387 if (list && size <= list_len)
388 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 388 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -390,15 +390,18 @@ ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
390} 390}
391 391
392static int 392static int
393ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 393ext4_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
394 size_t size, int type)
394{ 395{
395 struct posix_acl *acl; 396 struct posix_acl *acl;
396 int error; 397 int error;
397 398
398 if (!test_opt(inode->i_sb, POSIX_ACL)) 399 if (strcmp(name, "") != 0)
400 return -EINVAL;
401 if (!test_opt(dentry->d_sb, POSIX_ACL))
399 return -EOPNOTSUPP; 402 return -EOPNOTSUPP;
400 403
401 acl = ext4_get_acl(inode, type); 404 acl = ext4_get_acl(dentry->d_inode, type);
402 if (IS_ERR(acl)) 405 if (IS_ERR(acl))
403 return PTR_ERR(acl); 406 return PTR_ERR(acl);
404 if (acl == NULL) 407 if (acl == NULL)
@@ -410,31 +413,16 @@ ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
410} 413}
411 414
412static int 415static int
413ext4_xattr_get_acl_access(struct inode *inode, const char *name, 416ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
414 void *buffer, size_t size) 417 size_t size, int flags, int type)
415{
416 if (strcmp(name, "") != 0)
417 return -EINVAL;
418 return ext4_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
419}
420
421static int
422ext4_xattr_get_acl_default(struct inode *inode, const char *name,
423 void *buffer, size_t size)
424{
425 if (strcmp(name, "") != 0)
426 return -EINVAL;
427 return ext4_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
428}
429
430static int
431ext4_xattr_set_acl(struct inode *inode, int type, const void *value,
432 size_t size)
433{ 418{
419 struct inode *inode = dentry->d_inode;
434 handle_t *handle; 420 handle_t *handle;
435 struct posix_acl *acl; 421 struct posix_acl *acl;
436 int error, retries = 0; 422 int error, retries = 0;
437 423
424 if (strcmp(name, "") != 0)
425 return -EINVAL;
438 if (!test_opt(inode->i_sb, POSIX_ACL)) 426 if (!test_opt(inode->i_sb, POSIX_ACL))
439 return -EOPNOTSUPP; 427 return -EOPNOTSUPP;
440 if (!is_owner_or_cap(inode)) 428 if (!is_owner_or_cap(inode))
@@ -466,34 +454,18 @@ release_and_out:
466 return error; 454 return error;
467} 455}
468 456
469static int
470ext4_xattr_set_acl_access(struct inode *inode, const char *name,
471 const void *value, size_t size, int flags)
472{
473 if (strcmp(name, "") != 0)
474 return -EINVAL;
475 return ext4_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
476}
477
478static int
479ext4_xattr_set_acl_default(struct inode *inode, const char *name,
480 const void *value, size_t size, int flags)
481{
482 if (strcmp(name, "") != 0)
483 return -EINVAL;
484 return ext4_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
485}
486
487struct xattr_handler ext4_xattr_acl_access_handler = { 457struct xattr_handler ext4_xattr_acl_access_handler = {
488 .prefix = POSIX_ACL_XATTR_ACCESS, 458 .prefix = POSIX_ACL_XATTR_ACCESS,
459 .flags = ACL_TYPE_ACCESS,
489 .list = ext4_xattr_list_acl_access, 460 .list = ext4_xattr_list_acl_access,
490 .get = ext4_xattr_get_acl_access, 461 .get = ext4_xattr_get_acl,
491 .set = ext4_xattr_set_acl_access, 462 .set = ext4_xattr_set_acl,
492}; 463};
493 464
494struct xattr_handler ext4_xattr_acl_default_handler = { 465struct xattr_handler ext4_xattr_acl_default_handler = {
495 .prefix = POSIX_ACL_XATTR_DEFAULT, 466 .prefix = POSIX_ACL_XATTR_DEFAULT,
467 .flags = ACL_TYPE_DEFAULT,
496 .list = ext4_xattr_list_acl_default, 468 .list = ext4_xattr_list_acl_default,
497 .get = ext4_xattr_get_acl_default, 469 .get = ext4_xattr_get_acl,
498 .set = ext4_xattr_set_acl_default, 470 .set = ext4_xattr_set_acl,
499}; 471};
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 4df8621ec31c..a60ab9aad57d 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -16,7 +16,6 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/swap.h> 17#include <linux/swap.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/version.h>
20#include <linux/blkdev.h> 19#include <linux/blkdev.h>
21#include <linux/mutex.h> 20#include <linux/mutex.h>
22#include "ext4.h" 21#include "ext4.h"
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ab31e65d46d0..af7b62699ea9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -699,11 +699,17 @@ struct ext4_inode_info {
699 unsigned int i_reserved_meta_blocks; 699 unsigned int i_reserved_meta_blocks;
700 unsigned int i_allocated_meta_blocks; 700 unsigned int i_allocated_meta_blocks;
701 unsigned short i_delalloc_reserved_flag; 701 unsigned short i_delalloc_reserved_flag;
702 sector_t i_da_metadata_calc_last_lblock;
703 int i_da_metadata_calc_len;
702 704
703 /* on-disk additional length */ 705 /* on-disk additional length */
704 __u16 i_extra_isize; 706 __u16 i_extra_isize;
705 707
706 spinlock_t i_block_reservation_lock; 708 spinlock_t i_block_reservation_lock;
709#ifdef CONFIG_QUOTA
710 /* quota space reservation, managed internally by quota code */
711 qsize_t i_reserved_quota;
712#endif
707 713
708 /* completed async DIOs that might need unwritten extents handling */ 714 /* completed async DIOs that might need unwritten extents handling */
709 struct list_head i_aio_dio_complete_list; 715 struct list_head i_aio_dio_complete_list;
@@ -1435,7 +1441,7 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
1435extern int ext4_block_truncate_page(handle_t *handle, 1441extern int ext4_block_truncate_page(handle_t *handle,
1436 struct address_space *mapping, loff_t from); 1442 struct address_space *mapping, loff_t from);
1437extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1443extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1438extern qsize_t ext4_get_reserved_space(struct inode *inode); 1444extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1439extern int flush_aio_dio_completed_IO(struct inode *inode); 1445extern int flush_aio_dio_completed_IO(struct inode *inode);
1440/* ioctl.c */ 1446/* ioctl.c */
1441extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 1447extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 2ca686454e87..bdb6ce7e2eb4 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); 225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
226} 226}
227 227
228extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); 228extern int ext4_ext_calc_metadata_amount(struct inode *inode,
229 sector_t lblocks);
229extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); 230extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
230extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 231extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
231extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 232extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3a7928f825e4..7d7b74e94687 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
296 * to allocate @blocks 296 * to allocate @blocks
297 * Worse case is one block per extent 297 * Worse case is one block per extent
298 */ 298 */
299int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) 299int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
300{ 300{
301 int lcap, icap, rcap, leafs, idxs, num; 301 struct ext4_inode_info *ei = EXT4_I(inode);
302 int newextents = blocks; 302 int idxs, num = 0;
303
304 rcap = ext4_ext_space_root_idx(inode, 0);
305 lcap = ext4_ext_space_block(inode, 0);
306 icap = ext4_ext_space_block_idx(inode, 0);
307 303
308 /* number of new leaf blocks needed */ 304 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
309 num = leafs = (newextents + lcap - 1) / lcap; 305 / sizeof(struct ext4_extent_idx));
310 306
311 /* 307 /*
312 * Worse case, we need separate index block(s) 308 * If the new delayed allocation block is contiguous with the
313 * to link all new leaf blocks 309 * previous da block, it can share index blocks with the
310 * previous block, so we only need to allocate a new index
311 * block every idxs leaf blocks. At ldxs**2 blocks, we need
312 * an additional index block, and at ldxs**3 blocks, yet
313 * another index blocks.
314 */ 314 */
315 idxs = (leafs + icap - 1) / icap; 315 if (ei->i_da_metadata_calc_len &&
316 do { 316 ei->i_da_metadata_calc_last_lblock+1 == lblock) {
317 num += idxs; 317 if ((ei->i_da_metadata_calc_len % idxs) == 0)
318 idxs = (idxs + icap - 1) / icap; 318 num++;
319 } while (idxs > rcap); 319 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
320 num++;
321 if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
322 num++;
323 ei->i_da_metadata_calc_len = 0;
324 } else
325 ei->i_da_metadata_calc_len++;
326 ei->i_da_metadata_calc_last_lblock++;
327 return num;
328 }
320 329
321 return num; 330 /*
331 * In the worst case we need a new set of index blocks at
332 * every level of the inode's extent tree.
333 */
334 ei->i_da_metadata_calc_len = 1;
335 ei->i_da_metadata_calc_last_lblock = lblock;
336 return ext_depth(inode) + 1;
322} 337}
323 338
324static int 339static int
@@ -3023,6 +3038,14 @@ out:
3023 return err; 3038 return err;
3024} 3039}
3025 3040
3041static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3042 sector_t block, int count)
3043{
3044 int i;
3045 for (i = 0; i < count; i++)
3046 unmap_underlying_metadata(bdev, block + i);
3047}
3048
3026static int 3049static int
3027ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3050ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3028 ext4_lblk_t iblock, unsigned int max_blocks, 3051 ext4_lblk_t iblock, unsigned int max_blocks,
@@ -3098,6 +3121,18 @@ out:
3098 } else 3121 } else
3099 allocated = ret; 3122 allocated = ret;
3100 set_buffer_new(bh_result); 3123 set_buffer_new(bh_result);
3124 /*
3125 * if we allocated more blocks than requested
3126 * we need to make sure we unmap the extra block
3127 * allocated. The actual needed block will get
3128 * unmapped later when we find the buffer_head marked
3129 * new.
3130 */
3131 if (allocated > max_blocks) {
3132 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
3133 newblock + max_blocks,
3134 allocated - max_blocks);
3135 }
3101map_out: 3136map_out:
3102 set_buffer_mapped(bh_result); 3137 set_buffer_mapped(bh_result);
3103out1: 3138out1:
@@ -3190,7 +3225,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3190 * this situation is possible, though, _during_ tree modification; 3225 * this situation is possible, though, _during_ tree modification;
3191 * this is why assert can't be put in ext4_ext_find_extent() 3226 * this is why assert can't be put in ext4_ext_find_extent()
3192 */ 3227 */
3193 BUG_ON(path[depth].p_ext == NULL && depth != 0); 3228 if (path[depth].p_ext == NULL && depth != 0) {
3229 ext4_error(inode->i_sb, __func__, "bad extent address "
3230 "inode: %lu, iblock: %d, depth: %d",
3231 inode->i_ino, iblock, depth);
3232 err = -EIO;
3233 goto out2;
3234 }
3194 eh = path[depth].p_hdr; 3235 eh = path[depth].p_hdr;
3195 3236
3196 ex = path[depth].p_ext; 3237 ex = path[depth].p_ext;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0b22497d92e1..98bd140aad01 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
88 return ext4_force_commit(inode->i_sb); 88 return ext4_force_commit(inode->i_sb);
89 89
90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; 90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
91 if (jbd2_log_start_commit(journal, commit_tid)) 91 if (jbd2_log_start_commit(journal, commit_tid)) {
92 /*
93 * When the journal is on a different device than the
94 * fs data disk, we need to issue the barrier in
95 * writeback mode. (In ordered mode, the jbd2 layer
96 * will take care of issuing the barrier. In
97 * data=journal, all of the data blocks are written to
98 * the journal device.)
99 */
100 if (ext4_should_writeback_data(inode) &&
101 (journal->j_fs_dev != journal->j_dev) &&
102 (journal->j_flags & JBD2_BARRIER))
103 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
92 jbd2_log_wait_commit(journal, commit_tid); 104 jbd2_log_wait_commit(journal, commit_tid);
93 else if (journal->j_flags & JBD2_BARRIER) 105 } else if (journal->j_flags & JBD2_BARRIER)
94 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 106 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
95 return ret; 107 return ret;
96} 108}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5352db1a3086..c818972c8302 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1003,83 +1003,94 @@ out:
1003 return err; 1003 return err;
1004} 1004}
1005 1005
1006qsize_t ext4_get_reserved_space(struct inode *inode) 1006#ifdef CONFIG_QUOTA
1007qsize_t *ext4_get_reserved_space(struct inode *inode)
1007{ 1008{
1008 unsigned long long total; 1009 return &EXT4_I(inode)->i_reserved_quota;
1009
1010 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1011 total = EXT4_I(inode)->i_reserved_data_blocks +
1012 EXT4_I(inode)->i_reserved_meta_blocks;
1013 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1014
1015 return (total << inode->i_blkbits);
1016} 1010}
1011#endif
1012
1017/* 1013/*
1018 * Calculate the number of metadata blocks need to reserve 1014 * Calculate the number of metadata blocks need to reserve
1019 * to allocate @blocks for non extent file based file 1015 * to allocate a new block at @lblocks for non extent file based file
1020 */ 1016 */
1021static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) 1017static int ext4_indirect_calc_metadata_amount(struct inode *inode,
1018 sector_t lblock)
1022{ 1019{
1023 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1020 struct ext4_inode_info *ei = EXT4_I(inode);
1024 int ind_blks, dind_blks, tind_blks; 1021 int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
1025 1022 int blk_bits;
1026 /* number of new indirect blocks needed */
1027 ind_blks = (blocks + icap - 1) / icap;
1028 1023
1029 dind_blks = (ind_blks + icap - 1) / icap; 1024 if (lblock < EXT4_NDIR_BLOCKS)
1025 return 0;
1030 1026
1031 tind_blks = 1; 1027 lblock -= EXT4_NDIR_BLOCKS;
1032 1028
1033 return ind_blks + dind_blks + tind_blks; 1029 if (ei->i_da_metadata_calc_len &&
1030 (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
1031 ei->i_da_metadata_calc_len++;
1032 return 0;
1033 }
1034 ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
1035 ei->i_da_metadata_calc_len = 1;
1036 blk_bits = roundup_pow_of_two(lblock + 1);
1037 return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
1034} 1038}
1035 1039
1036/* 1040/*
1037 * Calculate the number of metadata blocks need to reserve 1041 * Calculate the number of metadata blocks need to reserve
1038 * to allocate given number of blocks 1042 * to allocate a block located at @lblock
1039 */ 1043 */
1040static int ext4_calc_metadata_amount(struct inode *inode, int blocks) 1044static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
1041{ 1045{
1042 if (!blocks)
1043 return 0;
1044
1045 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 1046 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1046 return ext4_ext_calc_metadata_amount(inode, blocks); 1047 return ext4_ext_calc_metadata_amount(inode, lblock);
1047 1048
1048 return ext4_indirect_calc_metadata_amount(inode, blocks); 1049 return ext4_indirect_calc_metadata_amount(inode, lblock);
1049} 1050}
1050 1051
1052/*
1053 * Called with i_data_sem down, which is important since we can call
1054 * ext4_discard_preallocations() from here.
1055 */
1051static void ext4_da_update_reserve_space(struct inode *inode, int used) 1056static void ext4_da_update_reserve_space(struct inode *inode, int used)
1052{ 1057{
1053 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1058 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1054 int total, mdb, mdb_free; 1059 struct ext4_inode_info *ei = EXT4_I(inode);
1055 1060 int mdb_free = 0;
1056 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1061
1057 /* recalculate the number of metablocks still need to be reserved */ 1062 spin_lock(&ei->i_block_reservation_lock);
1058 total = EXT4_I(inode)->i_reserved_data_blocks - used; 1063 if (unlikely(used > ei->i_reserved_data_blocks)) {
1059 mdb = ext4_calc_metadata_amount(inode, total); 1064 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
1060 1065 "with only %d reserved data blocks\n",
1061 /* figure out how many metablocks to release */ 1066 __func__, inode->i_ino, used,
1062 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1067 ei->i_reserved_data_blocks);
1063 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1068 WARN_ON(1);
1064 1069 used = ei->i_reserved_data_blocks;
1065 if (mdb_free) { 1070 }
1066 /* Account for allocated meta_blocks */ 1071
1067 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; 1072 /* Update per-inode reservations */
1068 1073 ei->i_reserved_data_blocks -= used;
1069 /* update fs dirty blocks counter */ 1074 used += ei->i_allocated_meta_blocks;
1075 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
1076 ei->i_allocated_meta_blocks = 0;
1077 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
1078
1079 if (ei->i_reserved_data_blocks == 0) {
1080 /*
1081 * We can release all of the reserved metadata blocks
1082 * only when we have written all of the delayed
1083 * allocation blocks.
1084 */
1085 mdb_free = ei->i_reserved_meta_blocks;
1086 ei->i_reserved_meta_blocks = 0;
1087 ei->i_da_metadata_calc_len = 0;
1070 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); 1088 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
1071 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1072 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1073 } 1089 }
1074
1075 /* update per-inode reservations */
1076 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1077 EXT4_I(inode)->i_reserved_data_blocks -= used;
1078 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1090 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1079 1091
1080 /* 1092 /* Update quota subsystem */
1081 * free those over-booking quota for metadata blocks 1093 vfs_dq_claim_block(inode, used);
1082 */
1083 if (mdb_free) 1094 if (mdb_free)
1084 vfs_dq_release_reservation_block(inode, mdb_free); 1095 vfs_dq_release_reservation_block(inode, mdb_free);
1085 1096
@@ -1088,7 +1099,8 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1088 * there aren't any writers on the inode, we can discard the 1099 * there aren't any writers on the inode, we can discard the
1089 * inode's preallocations. 1100 * inode's preallocations.
1090 */ 1101 */
1091 if (!total && (atomic_read(&inode->i_writecount) == 0)) 1102 if ((ei->i_reserved_data_blocks == 0) &&
1103 (atomic_read(&inode->i_writecount) == 0))
1092 ext4_discard_preallocations(inode); 1104 ext4_discard_preallocations(inode);
1093} 1105}
1094 1106
@@ -1797,11 +1809,15 @@ static int ext4_journalled_write_end(struct file *file,
1797 return ret ? ret : copied; 1809 return ret ? ret : copied;
1798} 1810}
1799 1811
1800static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1812/*
1813 * Reserve a single block located at lblock
1814 */
1815static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
1801{ 1816{
1802 int retries = 0; 1817 int retries = 0;
1803 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1818 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1804 unsigned long md_needed, mdblocks, total = 0; 1819 struct ext4_inode_info *ei = EXT4_I(inode);
1820 unsigned long md_needed, md_reserved;
1805 1821
1806 /* 1822 /*
1807 * recalculate the amount of metadata blocks to reserve 1823 * recalculate the amount of metadata blocks to reserve
@@ -1809,86 +1825,90 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1809 * worse case is one extent per block 1825 * worse case is one extent per block
1810 */ 1826 */
1811repeat: 1827repeat:
1812 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1828 spin_lock(&ei->i_block_reservation_lock);
1813 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; 1829 md_reserved = ei->i_reserved_meta_blocks;
1814 mdblocks = ext4_calc_metadata_amount(inode, total); 1830 md_needed = ext4_calc_metadata_amount(inode, lblock);
1815 BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); 1831 spin_unlock(&ei->i_block_reservation_lock);
1816
1817 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1818 total = md_needed + nrblocks;
1819 1832
1820 /* 1833 /*
1821 * Make quota reservation here to prevent quota overflow 1834 * Make quota reservation here to prevent quota overflow
1822 * later. Real quota accounting is done at pages writeout 1835 * later. Real quota accounting is done at pages writeout
1823 * time. 1836 * time.
1824 */ 1837 */
1825 if (vfs_dq_reserve_block(inode, total)) { 1838 if (vfs_dq_reserve_block(inode, md_needed + 1)) {
1826 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1839 /*
1840 * We tend to badly over-estimate the amount of
1841 * metadata blocks which are needed, so if we have
1842 * reserved any metadata blocks, try to force out the
1843 * inode and see if we have any better luck.
1844 */
1845 if (md_reserved && retries++ <= 3)
1846 goto retry;
1827 return -EDQUOT; 1847 return -EDQUOT;
1828 } 1848 }
1829 1849
1830 if (ext4_claim_free_blocks(sbi, total)) { 1850 if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
1831 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1851 vfs_dq_release_reservation_block(inode, md_needed + 1);
1832 vfs_dq_release_reservation_block(inode, total);
1833 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1852 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1853 retry:
1854 if (md_reserved)
1855 write_inode_now(inode, (retries == 3));
1834 yield(); 1856 yield();
1835 goto repeat; 1857 goto repeat;
1836 } 1858 }
1837 return -ENOSPC; 1859 return -ENOSPC;
1838 } 1860 }
1839 EXT4_I(inode)->i_reserved_data_blocks += nrblocks; 1861 spin_lock(&ei->i_block_reservation_lock);
1840 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; 1862 ei->i_reserved_data_blocks++;
1863 ei->i_reserved_meta_blocks += md_needed;
1864 spin_unlock(&ei->i_block_reservation_lock);
1841 1865
1842 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1843 return 0; /* success */ 1866 return 0; /* success */
1844} 1867}
1845 1868
1846static void ext4_da_release_space(struct inode *inode, int to_free) 1869static void ext4_da_release_space(struct inode *inode, int to_free)
1847{ 1870{
1848 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1871 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1849 int total, mdb, mdb_free, release; 1872 struct ext4_inode_info *ei = EXT4_I(inode);
1850 1873
1851 if (!to_free) 1874 if (!to_free)
1852 return; /* Nothing to release, exit */ 1875 return; /* Nothing to release, exit */
1853 1876
1854 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1877 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1855 1878
1856 if (!EXT4_I(inode)->i_reserved_data_blocks) { 1879 if (unlikely(to_free > ei->i_reserved_data_blocks)) {
1857 /* 1880 /*
1858 * if there is no reserved blocks, but we try to free some 1881 * if there aren't enough reserved blocks, then the
1859 * then the counter is messed up somewhere. 1882 * counter is messed up somewhere. Since this
1860 * but since this function is called from invalidate 1883 * function is called from invalidate page, it's
1861 * page, it's harmless to return without any action 1884 * harmless to return without any action.
1862 */ 1885 */
1863 printk(KERN_INFO "ext4 delalloc try to release %d reserved " 1886 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
1864 "blocks for inode %lu, but there is no reserved " 1887 "ino %lu, to_free %d with only %d reserved "
1865 "data blocks\n", to_free, inode->i_ino); 1888 "data blocks\n", inode->i_ino, to_free,
1866 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1889 ei->i_reserved_data_blocks);
1867 return; 1890 WARN_ON(1);
1891 to_free = ei->i_reserved_data_blocks;
1868 } 1892 }
1893 ei->i_reserved_data_blocks -= to_free;
1869 1894
1870 /* recalculate the number of metablocks still need to be reserved */ 1895 if (ei->i_reserved_data_blocks == 0) {
1871 total = EXT4_I(inode)->i_reserved_data_blocks - to_free; 1896 /*
1872 mdb = ext4_calc_metadata_amount(inode, total); 1897 * We can release all of the reserved metadata blocks
1873 1898 * only when we have written all of the delayed
1874 /* figure out how many metablocks to release */ 1899 * allocation blocks.
1875 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1900 */
1876 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1901 to_free += ei->i_reserved_meta_blocks;
1877 1902 ei->i_reserved_meta_blocks = 0;
1878 release = to_free + mdb_free; 1903 ei->i_da_metadata_calc_len = 0;
1879 1904 }
1880 /* update fs dirty blocks counter for truncate case */
1881 percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
1882 1905
1883 /* update per-inode reservations */ 1906 /* update fs dirty blocks counter */
1884 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); 1907 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
1885 EXT4_I(inode)->i_reserved_data_blocks -= to_free;
1886 1908
1887 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1888 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1889 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1909 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1890 1910
1891 vfs_dq_release_reservation_block(inode, release); 1911 vfs_dq_release_reservation_block(inode, to_free);
1892} 1912}
1893 1913
1894static void ext4_da_page_release_reservation(struct page *page, 1914static void ext4_da_page_release_reservation(struct page *page,
@@ -2494,7 +2514,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2494 * XXX: __block_prepare_write() unmaps passed block, 2514 * XXX: __block_prepare_write() unmaps passed block,
2495 * is it OK? 2515 * is it OK?
2496 */ 2516 */
2497 ret = ext4_da_reserve_space(inode, 1); 2517 ret = ext4_da_reserve_space(inode, iblock);
2498 if (ret) 2518 if (ret)
2499 /* not enough space to reserve */ 2519 /* not enough space to reserve */
2500 return ret; 2520 return ret;
@@ -2968,8 +2988,7 @@ retry:
2968out_writepages: 2988out_writepages:
2969 if (!no_nrwrite_index_update) 2989 if (!no_nrwrite_index_update)
2970 wbc->no_nrwrite_index_update = 0; 2990 wbc->no_nrwrite_index_update = 0;
2971 if (wbc->nr_to_write > nr_to_writebump) 2991 wbc->nr_to_write -= nr_to_writebump;
2972 wbc->nr_to_write -= nr_to_writebump;
2973 wbc->range_start = range_start; 2992 wbc->range_start = range_start;
2974 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); 2993 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
2975 return ret; 2994 return ret;
@@ -2994,11 +3013,18 @@ static int ext4_nonda_switch(struct super_block *sb)
2994 if (2 * free_blocks < 3 * dirty_blocks || 3013 if (2 * free_blocks < 3 * dirty_blocks ||
2995 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { 3014 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
2996 /* 3015 /*
2997 * free block count is less that 150% of dirty blocks 3016 * free block count is less than 150% of dirty blocks
2998 * or free blocks is less that watermark 3017 * or free blocks is less than watermark
2999 */ 3018 */
3000 return 1; 3019 return 1;
3001 } 3020 }
3021 /*
3022 * Even if we don't switch but are nearing capacity,
3023 * start pushing delalloc when 1/2 of free blocks are dirty.
3024 */
3025 if (free_blocks < 2 * dirty_blocks)
3026 writeback_inodes_sb_if_idle(sb);
3027
3002 return 0; 3028 return 0;
3003} 3029}
3004 3030
@@ -4794,6 +4820,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4794 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; 4820 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
4795 inode->i_size = ext4_isize(raw_inode); 4821 inode->i_size = ext4_isize(raw_inode);
4796 ei->i_disksize = inode->i_size; 4822 ei->i_disksize = inode->i_size;
4823#ifdef CONFIG_QUOTA
4824 ei->i_reserved_quota = 0;
4825#endif
4797 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 4826 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
4798 ei->i_block_group = iloc.block_group; 4827 ei->i_block_group = iloc.block_group;
4799 ei->i_last_alloc_group = ~0; 4828 ei->i_last_alloc_group = ~0;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b1fd3daadc9c..d34afad3e137 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2755,12 +2755,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2755 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) 2755 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2756 /* release all the reserved blocks if non delalloc */ 2756 /* release all the reserved blocks if non delalloc */
2757 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); 2757 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
2758 else {
2759 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
2760 ac->ac_b_ex.fe_len);
2761 /* convert reserved quota blocks to real quota blocks */
2762 vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len);
2763 }
2764 2758
2765 if (sbi->s_log_groups_per_flex) { 2759 if (sbi->s_log_groups_per_flex) {
2766 ext4_group_t flex_group = ext4_flex_group(sbi, 2760 ext4_group_t flex_group = ext4_flex_group(sbi,
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 0ca811061bc7..436521cae456 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -17,7 +17,6 @@
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/version.h>
21#include <linux/blkdev.h> 20#include <linux/blkdev.h>
22#include <linux/mutex.h> 21#include <linux/mutex.h>
23#include "ext4_jbd2.h" 22#include "ext4_jbd2.h"
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 768c111a77ec..735c20d5fd56 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -702,8 +702,12 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
702 ei->i_reserved_data_blocks = 0; 702 ei->i_reserved_data_blocks = 0;
703 ei->i_reserved_meta_blocks = 0; 703 ei->i_reserved_meta_blocks = 0;
704 ei->i_allocated_meta_blocks = 0; 704 ei->i_allocated_meta_blocks = 0;
705 ei->i_da_metadata_calc_len = 0;
705 ei->i_delalloc_reserved_flag = 0; 706 ei->i_delalloc_reserved_flag = 0;
706 spin_lock_init(&(ei->i_block_reservation_lock)); 707 spin_lock_init(&(ei->i_block_reservation_lock));
708#ifdef CONFIG_QUOTA
709 ei->i_reserved_quota = 0;
710#endif
707 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); 711 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
708 ei->cur_aio_dio = NULL; 712 ei->cur_aio_dio = NULL;
709 ei->i_sync_tid = 0; 713 ei->i_sync_tid = 0;
@@ -1014,7 +1018,9 @@ static const struct dquot_operations ext4_quota_operations = {
1014 .reserve_space = dquot_reserve_space, 1018 .reserve_space = dquot_reserve_space,
1015 .claim_space = dquot_claim_space, 1019 .claim_space = dquot_claim_space,
1016 .release_rsv = dquot_release_reserved_space, 1020 .release_rsv = dquot_release_reserved_space,
1021#ifdef CONFIG_QUOTA
1017 .get_reserved_space = ext4_get_reserved_space, 1022 .get_reserved_space = ext4_get_reserved_space,
1023#endif
1018 .alloc_inode = dquot_alloc_inode, 1024 .alloc_inode = dquot_alloc_inode,
1019 .free_space = dquot_free_space, 1025 .free_space = dquot_free_space,
1020 .free_inode = dquot_free_inode, 1026 .free_inode = dquot_free_inode,
@@ -2137,11 +2143,8 @@ static int parse_strtoul(const char *buf,
2137{ 2143{
2138 char *endp; 2144 char *endp;
2139 2145
2140 while (*buf && isspace(*buf)) 2146 *value = simple_strtoul(skip_spaces(buf), &endp, 0);
2141 buf++; 2147 endp = skip_spaces(endp);
2142 *value = simple_strtoul(buf, &endp, 0);
2143 while (*endp && isspace(*endp))
2144 endp++;
2145 if (*endp || *value > max) 2148 if (*endp || *value > max)
2146 return -EINVAL; 2149 return -EINVAL;
2147 2150
@@ -2172,9 +2175,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2172 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2175 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2173 2176
2174 return snprintf(buf, PAGE_SIZE, "%llu\n", 2177 return snprintf(buf, PAGE_SIZE, "%llu\n",
2175 sbi->s_kbytes_written + 2178 (unsigned long long)(sbi->s_kbytes_written +
2176 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2179 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2177 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 2180 EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2178} 2181}
2179 2182
2180static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2183static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
@@ -4003,6 +4006,7 @@ static inline void unregister_as_ext2(void)
4003{ 4006{
4004 unregister_filesystem(&ext2_fs_type); 4007 unregister_filesystem(&ext2_fs_type);
4005} 4008}
4009MODULE_ALIAS("ext2");
4006#else 4010#else
4007static inline void register_as_ext2(void) { } 4011static inline void register_as_ext2(void) { }
4008static inline void unregister_as_ext2(void) { } 4012static inline void unregister_as_ext2(void) { }
@@ -4029,6 +4033,7 @@ static inline void unregister_as_ext3(void)
4029{ 4033{
4030 unregister_filesystem(&ext3_fs_type); 4034 unregister_filesystem(&ext3_fs_type);
4031} 4035}
4036MODULE_ALIAS("ext3");
4032#else 4037#else
4033static inline void register_as_ext3(void) { } 4038static inline void register_as_ext3(void) { }
4034static inline void unregister_as_ext3(void) { } 4039static inline void unregister_as_ext3(void) { }
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 910bf9a59cb3..f3a2f7ed45aa 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -92,7 +92,7 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
92 struct mb_cache_entry **); 92 struct mb_cache_entry **);
93static void ext4_xattr_rehash(struct ext4_xattr_header *, 93static void ext4_xattr_rehash(struct ext4_xattr_header *,
94 struct ext4_xattr_entry *); 94 struct ext4_xattr_entry *);
95static int ext4_xattr_list(struct inode *inode, char *buffer, 95static int ext4_xattr_list(struct dentry *dentry, char *buffer,
96 size_t buffer_size); 96 size_t buffer_size);
97 97
98static struct mb_cache *ext4_xattr_cache; 98static struct mb_cache *ext4_xattr_cache;
@@ -140,7 +140,7 @@ ext4_xattr_handler(int name_index)
140ssize_t 140ssize_t
141ext4_listxattr(struct dentry *dentry, char *buffer, size_t size) 141ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
142{ 142{
143 return ext4_xattr_list(dentry->d_inode, buffer, size); 143 return ext4_xattr_list(dentry, buffer, size);
144} 144}
145 145
146static int 146static int
@@ -325,7 +325,7 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
325} 325}
326 326
327static int 327static int
328ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry, 328ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
329 char *buffer, size_t buffer_size) 329 char *buffer, size_t buffer_size)
330{ 330{
331 size_t rest = buffer_size; 331 size_t rest = buffer_size;
@@ -335,9 +335,10 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
335 ext4_xattr_handler(entry->e_name_index); 335 ext4_xattr_handler(entry->e_name_index);
336 336
337 if (handler) { 337 if (handler) {
338 size_t size = handler->list(inode, buffer, rest, 338 size_t size = handler->list(dentry, buffer, rest,
339 entry->e_name, 339 entry->e_name,
340 entry->e_name_len); 340 entry->e_name_len,
341 handler->flags);
341 if (buffer) { 342 if (buffer) {
342 if (size > rest) 343 if (size > rest)
343 return -ERANGE; 344 return -ERANGE;
@@ -350,8 +351,9 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
350} 351}
351 352
352static int 353static int
353ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) 354ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
354{ 355{
356 struct inode *inode = dentry->d_inode;
355 struct buffer_head *bh = NULL; 357 struct buffer_head *bh = NULL;
356 int error; 358 int error;
357 359
@@ -376,7 +378,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
376 goto cleanup; 378 goto cleanup;
377 } 379 }
378 ext4_xattr_cache_insert(bh); 380 ext4_xattr_cache_insert(bh);
379 error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size); 381 error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
380 382
381cleanup: 383cleanup:
382 brelse(bh); 384 brelse(bh);
@@ -385,8 +387,9 @@ cleanup:
385} 387}
386 388
387static int 389static int
388ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) 390ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
389{ 391{
392 struct inode *inode = dentry->d_inode;
390 struct ext4_xattr_ibody_header *header; 393 struct ext4_xattr_ibody_header *header;
391 struct ext4_inode *raw_inode; 394 struct ext4_inode *raw_inode;
392 struct ext4_iloc iloc; 395 struct ext4_iloc iloc;
@@ -404,7 +407,7 @@ ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
404 error = ext4_xattr_check_names(IFIRST(header), end); 407 error = ext4_xattr_check_names(IFIRST(header), end);
405 if (error) 408 if (error)
406 goto cleanup; 409 goto cleanup;
407 error = ext4_xattr_list_entries(inode, IFIRST(header), 410 error = ext4_xattr_list_entries(dentry, IFIRST(header),
408 buffer, buffer_size); 411 buffer, buffer_size);
409 412
410cleanup: 413cleanup:
@@ -423,12 +426,12 @@ cleanup:
423 * used / required on success. 426 * used / required on success.
424 */ 427 */
425static int 428static int
426ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 429ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
427{ 430{
428 int i_error, b_error; 431 int i_error, b_error;
429 432
430 down_read(&EXT4_I(inode)->xattr_sem); 433 down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
431 i_error = ext4_xattr_ibody_list(inode, buffer, buffer_size); 434 i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
432 if (i_error < 0) { 435 if (i_error < 0) {
433 b_error = 0; 436 b_error = 0;
434 } else { 437 } else {
@@ -436,11 +439,11 @@ ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
436 buffer += i_error; 439 buffer += i_error;
437 buffer_size -= i_error; 440 buffer_size -= i_error;
438 } 441 }
439 b_error = ext4_xattr_block_list(inode, buffer, buffer_size); 442 b_error = ext4_xattr_block_list(dentry, buffer, buffer_size);
440 if (b_error < 0) 443 if (b_error < 0)
441 i_error = 0; 444 i_error = 0;
442 } 445 }
443 up_read(&EXT4_I(inode)->xattr_sem); 446 up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
444 return i_error + b_error; 447 return i_error + b_error;
445} 448}
446 449
@@ -1329,6 +1332,8 @@ retry:
1329 goto cleanup; 1332 goto cleanup;
1330 kfree(b_entry_name); 1333 kfree(b_entry_name);
1331 kfree(buffer); 1334 kfree(buffer);
1335 b_entry_name = NULL;
1336 buffer = NULL;
1332 brelse(is->iloc.bh); 1337 brelse(is->iloc.bh);
1333 kfree(is); 1338 kfree(is);
1334 kfree(bs); 1339 kfree(bs);
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index ca5f89fc6cae..983c253999a7 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -12,8 +12,8 @@
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
15ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size, 15ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
16 const char *name, size_t name_len) 16 const char *name, size_t name_len, int type)
17{ 17{
18 const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; 18 const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
19 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +28,23 @@ ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size,
28} 28}
29 29
30static int 30static int
31ext4_xattr_security_get(struct inode *inode, const char *name, 31ext4_xattr_security_get(struct dentry *dentry, const char *name,
32 void *buffer, size_t size) 32 void *buffer, size_t size, int type)
33{ 33{
34 if (strcmp(name, "") == 0) 34 if (strcmp(name, "") == 0)
35 return -EINVAL; 35 return -EINVAL;
36 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name, 36 return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
37 buffer, size); 37 name, buffer, size);
38} 38}
39 39
40static int 40static int
41ext4_xattr_security_set(struct inode *inode, const char *name, 41ext4_xattr_security_set(struct dentry *dentry, const char *name,
42 const void *value, size_t size, int flags) 42 const void *value, size_t size, int flags, int type)
43{ 43{
44 if (strcmp(name, "") == 0) 44 if (strcmp(name, "") == 0)
45 return -EINVAL; 45 return -EINVAL;
46 return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name, 46 return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
47 value, size, flags); 47 name, value, size, flags);
48} 48}
49 49
50int 50int
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index ac1a52cf2a37..15b50edc6587 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -14,8 +14,8 @@
14#include "xattr.h" 14#include "xattr.h"
15 15
16static size_t 16static size_t
17ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 17ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
18 const char *name, size_t name_len) 18 const char *name, size_t name_len, int type)
19{ 19{
20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
21 const size_t total_len = prefix_len + name_len + 1; 21 const size_t total_len = prefix_len + name_len + 1;
@@ -32,23 +32,23 @@ ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
32} 32}
33 33
34static int 34static int
35ext4_xattr_trusted_get(struct inode *inode, const char *name, 35ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer,
36 void *buffer, size_t size) 36 size_t size, int type)
37{ 37{
38 if (strcmp(name, "") == 0) 38 if (strcmp(name, "") == 0)
39 return -EINVAL; 39 return -EINVAL;
40 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name, 40 return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
41 buffer, size); 41 name, buffer, size);
42} 42}
43 43
44static int 44static int
45ext4_xattr_trusted_set(struct inode *inode, const char *name, 45ext4_xattr_trusted_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 46 const void *value, size_t size, int flags, int type)
47{ 47{
48 if (strcmp(name, "") == 0) 48 if (strcmp(name, "") == 0)
49 return -EINVAL; 49 return -EINVAL;
50 return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name, 50 return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
51 value, size, flags); 51 name, value, size, flags);
52} 52}
53 53
54struct xattr_handler ext4_xattr_trusted_handler = { 54struct xattr_handler ext4_xattr_trusted_handler = {
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index d91aa61b42aa..c4ce05746ce1 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -13,13 +13,13 @@
13#include "xattr.h" 13#include "xattr.h"
14 14
15static size_t 15static size_t
16ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size, 16ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
17 const char *name, size_t name_len) 17 const char *name, size_t name_len, int type)
18{ 18{
19 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 19 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
20 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
21 21
22 if (!test_opt(inode->i_sb, XATTR_USER)) 22 if (!test_opt(dentry->d_sb, XATTR_USER))
23 return 0; 23 return 0;
24 24
25 if (list && total_len <= list_size) { 25 if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
31} 31}
32 32
33static int 33static int
34ext4_xattr_user_get(struct inode *inode, const char *name, 34ext4_xattr_user_get(struct dentry *dentry, const char *name,
35 void *buffer, size_t size) 35 void *buffer, size_t size, int type)
36{ 36{
37 if (strcmp(name, "") == 0) 37 if (strcmp(name, "") == 0)
38 return -EINVAL; 38 return -EINVAL;
39 if (!test_opt(inode->i_sb, XATTR_USER)) 39 if (!test_opt(dentry->d_sb, XATTR_USER))
40 return -EOPNOTSUPP; 40 return -EOPNOTSUPP;
41 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size); 41 return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_USER,
42 name, buffer, size);
42} 43}
43 44
44static int 45static int
45ext4_xattr_user_set(struct inode *inode, const char *name, 46ext4_xattr_user_set(struct dentry *dentry, const char *name,
46 const void *value, size_t size, int flags) 47 const void *value, size_t size, int flags, int type)
47{ 48{
48 if (strcmp(name, "") == 0) 49 if (strcmp(name, "") == 0)
49 return -EINVAL; 50 return -EINVAL;
50 if (!test_opt(inode->i_sb, XATTR_USER)) 51 if (!test_opt(dentry->d_sb, XATTR_USER))
51 return -EOPNOTSUPP; 52 return -EOPNOTSUPP;
52 return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name, 53 return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_USER,
53 value, size, flags); 54 name, value, size, flags);
54} 55}
55 56
56struct xattr_handler ext4_xattr_user_handler = { 57struct xattr_handler ext4_xattr_user_handler = {
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 7db0979c6b72..e6efdfa0f6db 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -44,7 +44,8 @@ struct fat_mount_options {
44 nocase:1, /* Does this need case conversion? 0=need case conversion*/ 44 nocase:1, /* Does this need case conversion? 0=need case conversion*/
45 usefree:1, /* Use free_clusters for FAT32 */ 45 usefree:1, /* Use free_clusters for FAT32 */
46 tz_utc:1, /* Filesystem timestamps are in UTC */ 46 tz_utc:1, /* Filesystem timestamps are in UTC */
47 rodir:1; /* allow ATTR_RO for directory */ 47 rodir:1, /* allow ATTR_RO for directory */
48 discard:1; /* Issue discard requests on deletions */
48}; 49};
49 50
50#define FAT_HASH_BITS 8 51#define FAT_HASH_BITS 8
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index a81037721a6f..81184d3b75a3 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -566,16 +566,21 @@ int fat_free_clusters(struct inode *inode, int cluster)
566 goto error; 566 goto error;
567 } 567 }
568 568
569 /* 569 if (sbi->options.discard) {
570 * Issue discard for the sectors we no longer care about, 570 /*
571 * batching contiguous clusters into one request 571 * Issue discard for the sectors we no longer
572 */ 572 * care about, batching contiguous clusters
573 if (cluster != fatent.entry + 1) { 573 * into one request
574 int nr_clus = fatent.entry - first_cl + 1; 574 */
575 575 if (cluster != fatent.entry + 1) {
576 sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), 576 int nr_clus = fatent.entry - first_cl + 1;
577 nr_clus * sbi->sec_per_clus); 577
578 first_cl = cluster; 578 sb_issue_discard(sb,
579 fat_clus_to_blknr(sbi, first_cl),
580 nr_clus * sbi->sec_per_clus);
581
582 first_cl = cluster;
583 }
579 } 584 }
580 585
581 ops->ent_put(&fatent, FAT_ENT_FREE); 586 ops->ent_put(&fatent, FAT_ENT_FREE);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 76b7961ab663..14da530b05ca 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -858,6 +858,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
858 seq_puts(m, ",errors=panic"); 858 seq_puts(m, ",errors=panic");
859 else 859 else
860 seq_puts(m, ",errors=remount-ro"); 860 seq_puts(m, ",errors=remount-ro");
861 if (opts->discard)
862 seq_puts(m, ",discard");
861 863
862 return 0; 864 return 0;
863} 865}
@@ -871,7 +873,7 @@ enum {
871 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 873 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
872 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 874 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
873 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont, 875 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
874 Opt_err_panic, Opt_err_ro, Opt_err, 876 Opt_err_panic, Opt_err_ro, Opt_discard, Opt_err,
875}; 877};
876 878
877static const match_table_t fat_tokens = { 879static const match_table_t fat_tokens = {
@@ -899,6 +901,7 @@ static const match_table_t fat_tokens = {
899 {Opt_err_cont, "errors=continue"}, 901 {Opt_err_cont, "errors=continue"},
900 {Opt_err_panic, "errors=panic"}, 902 {Opt_err_panic, "errors=panic"},
901 {Opt_err_ro, "errors=remount-ro"}, 903 {Opt_err_ro, "errors=remount-ro"},
904 {Opt_discard, "discard"},
902 {Opt_obsolate, "conv=binary"}, 905 {Opt_obsolate, "conv=binary"},
903 {Opt_obsolate, "conv=text"}, 906 {Opt_obsolate, "conv=text"},
904 {Opt_obsolate, "conv=auto"}, 907 {Opt_obsolate, "conv=auto"},
@@ -1136,6 +1139,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1136 case Opt_rodir: 1139 case Opt_rodir:
1137 opts->rodir = 1; 1140 opts->rodir = 1;
1138 break; 1141 break;
1142 case Opt_discard:
1143 opts->discard = 1;
1144 break;
1139 1145
1140 /* obsolete mount options */ 1146 /* obsolete mount options */
1141 case Opt_obsolate: 1147 case Opt_obsolate:
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 0f55f5cb732f..d3da05f26465 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -9,6 +9,7 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/time.h>
12#include "fat.h" 13#include "fat.h"
13 14
14/* 15/*
@@ -157,10 +158,6 @@ extern struct timezone sys_tz;
157#define SECS_PER_MIN 60 158#define SECS_PER_MIN 60
158#define SECS_PER_HOUR (60 * 60) 159#define SECS_PER_HOUR (60 * 60)
159#define SECS_PER_DAY (SECS_PER_HOUR * 24) 160#define SECS_PER_DAY (SECS_PER_HOUR * 24)
160#define UNIX_SECS_1980 315532800L
161#if BITS_PER_LONG == 64
162#define UNIX_SECS_2108 4354819200L
163#endif
164/* days between 1.1.70 and 1.1.80 (2 leap days) */ 161/* days between 1.1.70 and 1.1.80 (2 leap days) */
165#define DAYS_DELTA (365 * 10 + 2) 162#define DAYS_DELTA (365 * 10 + 2)
166/* 120 (2100 - 1980) isn't leap year */ 163/* 120 (2100 - 1980) isn't leap year */
@@ -213,58 +210,35 @@ void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
213void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts, 210void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts,
214 __le16 *time, __le16 *date, u8 *time_cs) 211 __le16 *time, __le16 *date, u8 *time_cs)
215{ 212{
216 time_t second = ts->tv_sec; 213 struct tm tm;
217 time_t day, leap_day, month, year; 214 time_to_tm(ts->tv_sec, sbi->options.tz_utc ? 0 :
215 -sys_tz.tz_minuteswest * 60, &tm);
218 216
219 if (!sbi->options.tz_utc) 217 /* FAT can only support year between 1980 to 2107 */
220 second -= sys_tz.tz_minuteswest * SECS_PER_MIN; 218 if (tm.tm_year < 1980 - 1900) {
221
222 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
223 if (second < UNIX_SECS_1980) {
224 *time = 0; 219 *time = 0;
225 *date = cpu_to_le16((0 << 9) | (1 << 5) | 1); 220 *date = cpu_to_le16((0 << 9) | (1 << 5) | 1);
226 if (time_cs) 221 if (time_cs)
227 *time_cs = 0; 222 *time_cs = 0;
228 return; 223 return;
229 } 224 }
230#if BITS_PER_LONG == 64 225 if (tm.tm_year > 2107 - 1900) {
231 if (second >= UNIX_SECS_2108) {
232 *time = cpu_to_le16((23 << 11) | (59 << 5) | 29); 226 *time = cpu_to_le16((23 << 11) | (59 << 5) | 29);
233 *date = cpu_to_le16((127 << 9) | (12 << 5) | 31); 227 *date = cpu_to_le16((127 << 9) | (12 << 5) | 31);
234 if (time_cs) 228 if (time_cs)
235 *time_cs = 199; 229 *time_cs = 199;
236 return; 230 return;
237 } 231 }
238#endif
239 232
240 day = second / SECS_PER_DAY - DAYS_DELTA; 233 /* from 1900 -> from 1980 */
241 year = day / 365; 234 tm.tm_year -= 80;
242 leap_day = (year + 3) / 4; 235 /* 0~11 -> 1~12 */
243 if (year > YEAR_2100) /* 2100 isn't leap year */ 236 tm.tm_mon++;
244 leap_day--; 237 /* 0~59 -> 0~29(2sec counts) */
245 if (year * 365 + leap_day > day) 238 tm.tm_sec >>= 1;
246 year--;
247 leap_day = (year + 3) / 4;
248 if (year > YEAR_2100) /* 2100 isn't leap year */
249 leap_day--;
250 day -= year * 365 + leap_day;
251
252 if (IS_LEAP_YEAR(year) && day == days_in_year[3]) {
253 month = 2;
254 } else {
255 if (IS_LEAP_YEAR(year) && day > days_in_year[3])
256 day--;
257 for (month = 1; month < 12; month++) {
258 if (days_in_year[month + 1] > day)
259 break;
260 }
261 }
262 day -= days_in_year[month];
263 239
264 *time = cpu_to_le16(((second / SECS_PER_HOUR) % 24) << 11 240 *time = cpu_to_le16(tm.tm_hour << 11 | tm.tm_min << 5 | tm.tm_sec);
265 | ((second / SECS_PER_MIN) % 60) << 5 241 *date = cpu_to_le16(tm.tm_year << 9 | tm.tm_mon << 5 | tm.tm_mday);
266 | (second % SECS_PER_MIN) >> 1);
267 *date = cpu_to_le16((year << 9) | (month << 5) | (day + 1));
268 if (time_cs) 242 if (time_cs)
269 *time_cs = (ts->tv_sec & 1) * 100 + ts->tv_nsec / 10000000; 243 *time_cs = (ts->tv_sec & 1) * 100 + ts->tv_nsec / 10000000;
270} 244}
@@ -285,4 +259,3 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
285 } 259 }
286 return err; 260 return err;
287} 261}
288
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2cf93ec40a67..97e01dc0d95f 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -618,60 +618,90 @@ static DEFINE_RWLOCK(fasync_lock);
618static struct kmem_cache *fasync_cache __read_mostly; 618static struct kmem_cache *fasync_cache __read_mostly;
619 619
620/* 620/*
621 * fasync_helper() is used by almost all character device drivers 621 * Remove a fasync entry. If successfully removed, return
622 * to set up the fasync queue. It returns negative on error, 0 if it did 622 * positive and clear the FASYNC flag. If no entry exists,
623 * no changes and positive if it added/deleted the entry. 623 * do nothing and return 0.
624 *
625 * NOTE! It is very important that the FASYNC flag always
626 * match the state "is the filp on a fasync list".
627 *
628 * We always take the 'filp->f_lock', in since fasync_lock
629 * needs to be irq-safe.
624 */ 630 */
625int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 631static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
626{ 632{
627 struct fasync_struct *fa, **fp; 633 struct fasync_struct *fa, **fp;
628 struct fasync_struct *new = NULL;
629 int result = 0; 634 int result = 0;
630 635
631 if (on) { 636 spin_lock(&filp->f_lock);
632 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); 637 write_lock_irq(&fasync_lock);
633 if (!new) 638 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
634 return -ENOMEM; 639 if (fa->fa_file != filp)
640 continue;
641 *fp = fa->fa_next;
642 kmem_cache_free(fasync_cache, fa);
643 filp->f_flags &= ~FASYNC;
644 result = 1;
645 break;
635 } 646 }
647 write_unlock_irq(&fasync_lock);
648 spin_unlock(&filp->f_lock);
649 return result;
650}
651
652/*
653 * Add a fasync entry. Return negative on error, positive if
654 * added, and zero if did nothing but change an existing one.
655 *
656 * NOTE! It is very important that the FASYNC flag always
657 * match the state "is the filp on a fasync list".
658 */
659static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
660{
661 struct fasync_struct *new, *fa, **fp;
662 int result = 0;
663
664 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
665 if (!new)
666 return -ENOMEM;
636 667
637 /*
638 * We need to take f_lock first since it's not an IRQ-safe
639 * lock.
640 */
641 spin_lock(&filp->f_lock); 668 spin_lock(&filp->f_lock);
642 write_lock_irq(&fasync_lock); 669 write_lock_irq(&fasync_lock);
643 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 670 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
644 if (fa->fa_file == filp) { 671 if (fa->fa_file != filp)
645 if(on) { 672 continue;
646 fa->fa_fd = fd; 673 fa->fa_fd = fd;
647 kmem_cache_free(fasync_cache, new); 674 kmem_cache_free(fasync_cache, new);
648 } else { 675 goto out;
649 *fp = fa->fa_next;
650 kmem_cache_free(fasync_cache, fa);
651 result = 1;
652 }
653 goto out;
654 }
655 } 676 }
656 677
657 if (on) { 678 new->magic = FASYNC_MAGIC;
658 new->magic = FASYNC_MAGIC; 679 new->fa_file = filp;
659 new->fa_file = filp; 680 new->fa_fd = fd;
660 new->fa_fd = fd; 681 new->fa_next = *fapp;
661 new->fa_next = *fapp; 682 *fapp = new;
662 *fapp = new; 683 result = 1;
663 result = 1; 684 filp->f_flags |= FASYNC;
664 } 685
665out: 686out:
666 if (on)
667 filp->f_flags |= FASYNC;
668 else
669 filp->f_flags &= ~FASYNC;
670 write_unlock_irq(&fasync_lock); 687 write_unlock_irq(&fasync_lock);
671 spin_unlock(&filp->f_lock); 688 spin_unlock(&filp->f_lock);
672 return result; 689 return result;
673} 690}
674 691
692/*
693 * fasync_helper() is used by almost all character device drivers
694 * to set up the fasync queue, and for regular files by the file
695 * lease code. It returns negative on error, 0 if it did no changes
696 * and positive if it added/deleted the entry.
697 */
698int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
699{
700 if (!on)
701 return fasync_remove_entry(filp, fapp);
702 return fasync_add_entry(fd, filp, fapp);
703}
704
675EXPORT_SYMBOL(fasync_helper); 705EXPORT_SYMBOL(fasync_helper);
676 706
677void __kill_fasync(struct fasync_struct *fa, int sig, int band) 707void __kill_fasync(struct fasync_struct *fa, int sig, int band)
diff --git a/fs/file_table.c b/fs/file_table.c
index 4bef4c01ec6f..69652c5bd5f0 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -21,9 +21,12 @@
21#include <linux/fsnotify.h> 21#include <linux/fsnotify.h>
22#include <linux/sysctl.h> 22#include <linux/sysctl.h>
23#include <linux/percpu_counter.h> 23#include <linux/percpu_counter.h>
24#include <linux/ima.h>
24 25
25#include <asm/atomic.h> 26#include <asm/atomic.h>
26 27
28#include "internal.h"
29
27/* sysctl tunables... */ 30/* sysctl tunables... */
28struct files_stat_struct files_stat = { 31struct files_stat_struct files_stat = {
29 .max_files = NR_FILE 32 .max_files = NR_FILE
@@ -147,8 +150,6 @@ fail:
147 return NULL; 150 return NULL;
148} 151}
149 152
150EXPORT_SYMBOL(get_empty_filp);
151
152/** 153/**
153 * alloc_file - allocate and initialize a 'struct file' 154 * alloc_file - allocate and initialize a 'struct file'
154 * @mnt: the vfsmount on which the file will reside 155 * @mnt: the vfsmount on which the file will reside
@@ -164,8 +165,8 @@ EXPORT_SYMBOL(get_empty_filp);
164 * If all the callers of init_file() are eliminated, its 165 * If all the callers of init_file() are eliminated, its
165 * code should be moved into this function. 166 * code should be moved into this function.
166 */ 167 */
167struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry, 168struct file *alloc_file(struct path *path, fmode_t mode,
168 fmode_t mode, const struct file_operations *fop) 169 const struct file_operations *fop)
169{ 170{
170 struct file *file; 171 struct file *file;
171 172
@@ -173,35 +174,8 @@ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
173 if (!file) 174 if (!file)
174 return NULL; 175 return NULL;
175 176
176 init_file(file, mnt, dentry, mode, fop); 177 file->f_path = *path;
177 return file; 178 file->f_mapping = path->dentry->d_inode->i_mapping;
178}
179EXPORT_SYMBOL(alloc_file);
180
181/**
182 * init_file - initialize a 'struct file'
183 * @file: the already allocated 'struct file' to initialized
184 * @mnt: the vfsmount on which the file resides
185 * @dentry: the dentry representing this file
186 * @mode: the mode the file is opened with
187 * @fop: the 'struct file_operations' for this file
188 *
189 * Use this instead of setting the members directly. Doing so
190 * avoids making mistakes like forgetting the mntget() or
191 * forgetting to take a write on the mnt.
192 *
193 * Note: This is a crappy interface. It is here to make
194 * merging with the existing users of get_empty_filp()
195 * who have complex failure logic easier. All users
196 * of this should be moving to alloc_file().
197 */
198int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
199 fmode_t mode, const struct file_operations *fop)
200{
201 int error = 0;
202 file->f_path.dentry = dentry;
203 file->f_path.mnt = mntget(mnt);
204 file->f_mapping = dentry->d_inode->i_mapping;
205 file->f_mode = mode; 179 file->f_mode = mode;
206 file->f_op = fop; 180 file->f_op = fop;
207 181
@@ -211,14 +185,14 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
211 * visible. We do this for consistency, and so 185 * visible. We do this for consistency, and so
212 * that we can do debugging checks at __fput() 186 * that we can do debugging checks at __fput()
213 */ 187 */
214 if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) { 188 if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) {
215 file_take_write(file); 189 file_take_write(file);
216 error = mnt_clone_write(mnt); 190 WARN_ON(mnt_clone_write(path->mnt));
217 WARN_ON(error);
218 } 191 }
219 return error; 192 ima_counts_get(file);
193 return file;
220} 194}
221EXPORT_SYMBOL(init_file); 195EXPORT_SYMBOL(alloc_file);
222 196
223void fput(struct file *file) 197void fput(struct file *file)
224{ 198{
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 49bc1b8e8f19..1a7c42c64ff4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -242,6 +242,7 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
242/** 242/**
243 * bdi_start_writeback - start writeback 243 * bdi_start_writeback - start writeback
244 * @bdi: the backing device to write from 244 * @bdi: the backing device to write from
245 * @sb: write inodes from this super_block
245 * @nr_pages: the number of pages to write 246 * @nr_pages: the number of pages to write
246 * 247 *
247 * Description: 248 * Description:
@@ -1187,6 +1188,23 @@ void writeback_inodes_sb(struct super_block *sb)
1187EXPORT_SYMBOL(writeback_inodes_sb); 1188EXPORT_SYMBOL(writeback_inodes_sb);
1188 1189
1189/** 1190/**
1191 * writeback_inodes_sb_if_idle - start writeback if none underway
1192 * @sb: the superblock
1193 *
1194 * Invoke writeback_inodes_sb if no writeback is currently underway.
1195 * Returns 1 if writeback was started, 0 if not.
1196 */
1197int writeback_inodes_sb_if_idle(struct super_block *sb)
1198{
1199 if (!writeback_in_progress(sb->s_bdi)) {
1200 writeback_inodes_sb(sb);
1201 return 1;
1202 } else
1203 return 0;
1204}
1205EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1206
1207/**
1190 * sync_inodes_sb - sync sb inode pages 1208 * sync_inodes_sb - sync sb inode pages
1191 * @sb: the superblock 1209 * @sb: the superblock
1192 * 1210 *
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index e590242fa41a..3221a0c7944e 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -91,7 +91,7 @@ EXPORT_SYMBOL(fscache_object_destroy);
91 */ 91 */
92static struct fscache_object *fscache_objlist_lookup(loff_t *_pos) 92static struct fscache_object *fscache_objlist_lookup(loff_t *_pos)
93{ 93{
94 struct fscache_object *pobj, *obj, *minobj = NULL; 94 struct fscache_object *pobj, *obj = NULL, *minobj = NULL;
95 struct rb_node *p; 95 struct rb_node *p;
96 unsigned long pos; 96 unsigned long pos;
97 97
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index e0b53aa7bbec..55458031e501 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -1,62 +1,58 @@
1/* 1/*
2 * fs/generic_acl.c
3 *
4 * (C) 2005 Andreas Gruenbacher <agruen@suse.de> 2 * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
5 * 3 *
6 * This file is released under the GPL. 4 * This file is released under the GPL.
5 *
6 * Generic ACL support for in-memory filesystems.
7 */ 7 */
8 8
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/generic_acl.h> 11#include <linux/generic_acl.h>
12#include <linux/posix_acl.h>
13#include <linux/posix_acl_xattr.h>
12 14
13/** 15
14 * generic_acl_list - Generic xattr_handler->list() operation 16static size_t
15 * @ops: Filesystem specific getacl and setacl callbacks 17generic_acl_list(struct dentry *dentry, char *list, size_t list_size,
16 */ 18 const char *name, size_t name_len, int type)
17size_t
18generic_acl_list(struct inode *inode, struct generic_acl_operations *ops,
19 int type, char *list, size_t list_size)
20{ 19{
21 struct posix_acl *acl; 20 struct posix_acl *acl;
22 const char *name; 21 const char *xname;
23 size_t size; 22 size_t size;
24 23
25 acl = ops->getacl(inode, type); 24 acl = get_cached_acl(dentry->d_inode, type);
26 if (!acl) 25 if (!acl)
27 return 0; 26 return 0;
28 posix_acl_release(acl); 27 posix_acl_release(acl);
29 28
30 switch(type) { 29 switch (type) {
31 case ACL_TYPE_ACCESS: 30 case ACL_TYPE_ACCESS:
32 name = POSIX_ACL_XATTR_ACCESS; 31 xname = POSIX_ACL_XATTR_ACCESS;
33 break; 32 break;
34 33 case ACL_TYPE_DEFAULT:
35 case ACL_TYPE_DEFAULT: 34 xname = POSIX_ACL_XATTR_DEFAULT;
36 name = POSIX_ACL_XATTR_DEFAULT; 35 break;
37 break; 36 default:
38 37 return 0;
39 default:
40 return 0;
41 } 38 }
42 size = strlen(name) + 1; 39 size = strlen(xname) + 1;
43 if (list && size <= list_size) 40 if (list && size <= list_size)
44 memcpy(list, name, size); 41 memcpy(list, xname, size);
45 return size; 42 return size;
46} 43}
47 44
48/** 45static int
49 * generic_acl_get - Generic xattr_handler->get() operation 46generic_acl_get(struct dentry *dentry, const char *name, void *buffer,
50 * @ops: Filesystem specific getacl and setacl callbacks 47 size_t size, int type)
51 */
52int
53generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
54 int type, void *buffer, size_t size)
55{ 48{
56 struct posix_acl *acl; 49 struct posix_acl *acl;
57 int error; 50 int error;
58 51
59 acl = ops->getacl(inode, type); 52 if (strcmp(name, "") != 0)
53 return -EINVAL;
54
55 acl = get_cached_acl(dentry->d_inode, type);
60 if (!acl) 56 if (!acl)
61 return -ENODATA; 57 return -ENODATA;
62 error = posix_acl_to_xattr(acl, buffer, size); 58 error = posix_acl_to_xattr(acl, buffer, size);
@@ -65,17 +61,16 @@ generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
65 return error; 61 return error;
66} 62}
67 63
68/** 64static int
69 * generic_acl_set - Generic xattr_handler->set() operation 65generic_acl_set(struct dentry *dentry, const char *name, const void *value,
70 * @ops: Filesystem specific getacl and setacl callbacks 66 size_t size, int flags, int type)
71 */
72int
73generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
74 int type, const void *value, size_t size)
75{ 67{
68 struct inode *inode = dentry->d_inode;
76 struct posix_acl *acl = NULL; 69 struct posix_acl *acl = NULL;
77 int error; 70 int error;
78 71
72 if (strcmp(name, "") != 0)
73 return -EINVAL;
79 if (S_ISLNK(inode->i_mode)) 74 if (S_ISLNK(inode->i_mode))
80 return -EOPNOTSUPP; 75 return -EOPNOTSUPP;
81 if (!is_owner_or_cap(inode)) 76 if (!is_owner_or_cap(inode))
@@ -91,28 +86,27 @@ generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
91 error = posix_acl_valid(acl); 86 error = posix_acl_valid(acl);
92 if (error) 87 if (error)
93 goto failed; 88 goto failed;
94 switch(type) { 89 switch (type) {
95 case ACL_TYPE_ACCESS: 90 case ACL_TYPE_ACCESS:
96 mode = inode->i_mode; 91 mode = inode->i_mode;
97 error = posix_acl_equiv_mode(acl, &mode); 92 error = posix_acl_equiv_mode(acl, &mode);
98 if (error < 0) 93 if (error < 0)
99 goto failed; 94 goto failed;
100 inode->i_mode = mode; 95 inode->i_mode = mode;
101 if (error == 0) { 96 if (error == 0) {
102 posix_acl_release(acl); 97 posix_acl_release(acl);
103 acl = NULL; 98 acl = NULL;
104 } 99 }
105 break; 100 break;
106 101 case ACL_TYPE_DEFAULT:
107 case ACL_TYPE_DEFAULT: 102 if (!S_ISDIR(inode->i_mode)) {
108 if (!S_ISDIR(inode->i_mode)) { 103 error = -EINVAL;
109 error = -EINVAL; 104 goto failed;
110 goto failed; 105 }
111 } 106 break;
112 break;
113 } 107 }
114 } 108 }
115 ops->setacl(inode, type, acl); 109 set_cached_acl(inode, type, acl);
116 error = 0; 110 error = 0;
117failed: 111failed:
118 posix_acl_release(acl); 112 posix_acl_release(acl);
@@ -121,14 +115,12 @@ failed:
121 115
122/** 116/**
123 * generic_acl_init - Take care of acl inheritance at @inode create time 117 * generic_acl_init - Take care of acl inheritance at @inode create time
124 * @ops: Filesystem specific getacl and setacl callbacks
125 * 118 *
126 * Files created inside a directory with a default ACL inherit the 119 * Files created inside a directory with a default ACL inherit the
127 * directory's default ACL. 120 * directory's default ACL.
128 */ 121 */
129int 122int
130generic_acl_init(struct inode *inode, struct inode *dir, 123generic_acl_init(struct inode *inode, struct inode *dir)
131 struct generic_acl_operations *ops)
132{ 124{
133 struct posix_acl *acl = NULL; 125 struct posix_acl *acl = NULL;
134 mode_t mode = inode->i_mode; 126 mode_t mode = inode->i_mode;
@@ -136,7 +128,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
136 128
137 inode->i_mode = mode & ~current_umask(); 129 inode->i_mode = mode & ~current_umask();
138 if (!S_ISLNK(inode->i_mode)) 130 if (!S_ISLNK(inode->i_mode))
139 acl = ops->getacl(dir, ACL_TYPE_DEFAULT); 131 acl = get_cached_acl(dir, ACL_TYPE_DEFAULT);
140 if (acl) { 132 if (acl) {
141 struct posix_acl *clone; 133 struct posix_acl *clone;
142 134
@@ -145,7 +137,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
145 error = -ENOMEM; 137 error = -ENOMEM;
146 if (!clone) 138 if (!clone)
147 goto cleanup; 139 goto cleanup;
148 ops->setacl(inode, ACL_TYPE_DEFAULT, clone); 140 set_cached_acl(inode, ACL_TYPE_DEFAULT, clone);
149 posix_acl_release(clone); 141 posix_acl_release(clone);
150 } 142 }
151 clone = posix_acl_clone(acl, GFP_KERNEL); 143 clone = posix_acl_clone(acl, GFP_KERNEL);
@@ -156,7 +148,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
156 if (error >= 0) { 148 if (error >= 0) {
157 inode->i_mode = mode; 149 inode->i_mode = mode;
158 if (error > 0) 150 if (error > 0)
159 ops->setacl(inode, ACL_TYPE_ACCESS, clone); 151 set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
160 } 152 }
161 posix_acl_release(clone); 153 posix_acl_release(clone);
162 } 154 }
@@ -169,20 +161,19 @@ cleanup:
169 161
170/** 162/**
171 * generic_acl_chmod - change the access acl of @inode upon chmod() 163 * generic_acl_chmod - change the access acl of @inode upon chmod()
172 * @ops: FIlesystem specific getacl and setacl callbacks
173 * 164 *
174 * A chmod also changes the permissions of the owner, group/mask, and 165 * A chmod also changes the permissions of the owner, group/mask, and
175 * other ACL entries. 166 * other ACL entries.
176 */ 167 */
177int 168int
178generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops) 169generic_acl_chmod(struct inode *inode)
179{ 170{
180 struct posix_acl *acl, *clone; 171 struct posix_acl *acl, *clone;
181 int error = 0; 172 int error = 0;
182 173
183 if (S_ISLNK(inode->i_mode)) 174 if (S_ISLNK(inode->i_mode))
184 return -EOPNOTSUPP; 175 return -EOPNOTSUPP;
185 acl = ops->getacl(inode, ACL_TYPE_ACCESS); 176 acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
186 if (acl) { 177 if (acl) {
187 clone = posix_acl_clone(acl, GFP_KERNEL); 178 clone = posix_acl_clone(acl, GFP_KERNEL);
188 posix_acl_release(acl); 179 posix_acl_release(acl);
@@ -190,8 +181,37 @@ generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops)
190 return -ENOMEM; 181 return -ENOMEM;
191 error = posix_acl_chmod_masq(clone, inode->i_mode); 182 error = posix_acl_chmod_masq(clone, inode->i_mode);
192 if (!error) 183 if (!error)
193 ops->setacl(inode, ACL_TYPE_ACCESS, clone); 184 set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
194 posix_acl_release(clone); 185 posix_acl_release(clone);
195 } 186 }
196 return error; 187 return error;
197} 188}
189
190int
191generic_check_acl(struct inode *inode, int mask)
192{
193 struct posix_acl *acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
194
195 if (acl) {
196 int error = posix_acl_permission(inode, acl, mask);
197 posix_acl_release(acl);
198 return error;
199 }
200 return -EAGAIN;
201}
202
203struct xattr_handler generic_acl_access_handler = {
204 .prefix = POSIX_ACL_XATTR_ACCESS,
205 .flags = ACL_TYPE_ACCESS,
206 .list = generic_acl_list,
207 .get = generic_acl_get,
208 .set = generic_acl_set,
209};
210
211struct xattr_handler generic_acl_default_handler = {
212 .prefix = POSIX_ACL_XATTR_DEFAULT,
213 .flags = ACL_TYPE_DEFAULT,
214 .list = generic_acl_list,
215 .get = generic_acl_get,
216 .set = generic_acl_set,
217};
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 3eb1ea846173..87ee309d4c24 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -126,7 +126,7 @@ static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl)
126 error = posix_acl_to_xattr(acl, data, len); 126 error = posix_acl_to_xattr(acl, data, len);
127 if (error < 0) 127 if (error < 0)
128 goto out; 128 goto out;
129 error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, data, len, 0); 129 error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
130 if (!error) 130 if (!error)
131 set_cached_acl(inode, type, acl); 131 set_cached_acl(inode, type, acl);
132out: 132out:
@@ -232,9 +232,10 @@ static int gfs2_acl_type(const char *name)
232 return -EINVAL; 232 return -EINVAL;
233} 233}
234 234
235static int gfs2_xattr_system_get(struct inode *inode, const char *name, 235static int gfs2_xattr_system_get(struct dentry *dentry, const char *name,
236 void *buffer, size_t size) 236 void *buffer, size_t size, int xtype)
237{ 237{
238 struct inode *inode = dentry->d_inode;
238 struct posix_acl *acl; 239 struct posix_acl *acl;
239 int type; 240 int type;
240 int error; 241 int error;
@@ -255,9 +256,11 @@ static int gfs2_xattr_system_get(struct inode *inode, const char *name,
255 return error; 256 return error;
256} 257}
257 258
258static int gfs2_xattr_system_set(struct inode *inode, const char *name, 259static int gfs2_xattr_system_set(struct dentry *dentry, const char *name,
259 const void *value, size_t size, int flags) 260 const void *value, size_t size, int flags,
261 int xtype)
260{ 262{
263 struct inode *inode = dentry->d_inode;
261 struct gfs2_sbd *sdp = GFS2_SB(inode); 264 struct gfs2_sbd *sdp = GFS2_SB(inode);
262 struct posix_acl *acl = NULL; 265 struct posix_acl *acl = NULL;
263 int error = 0, type; 266 int error = 0, type;
@@ -319,7 +322,7 @@ static int gfs2_xattr_system_set(struct inode *inode, const char *name,
319 } 322 }
320 323
321set_acl: 324set_acl:
322 error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, 0); 325 error = __gfs2_xattr_set(inode, name, value, size, 0, GFS2_EATYPE_SYS);
323 if (!error) { 326 if (!error) {
324 if (acl) 327 if (acl)
325 set_cached_acl(inode, type, acl); 328 set_cached_acl(inode, type, acl);
@@ -334,6 +337,7 @@ out:
334 337
335struct xattr_handler gfs2_xattr_system_handler = { 338struct xattr_handler gfs2_xattr_system_handler = {
336 .prefix = XATTR_SYSTEM_PREFIX, 339 .prefix = XATTR_SYSTEM_PREFIX,
340 .flags = GFS2_EATYPE_SYS,
337 .get = gfs2_xattr_system_get, 341 .get = gfs2_xattr_system_get,
338 .set = gfs2_xattr_system_set, 342 .set = gfs2_xattr_system_set,
339}; 343};
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4eb308aa3234..a6abbae8a278 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -569,6 +569,40 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
569 return ret; 569 return ret;
570} 570}
571 571
572/**
573 * gfs2_file_aio_write - Perform a write to a file
574 * @iocb: The io context
575 * @iov: The data to write
576 * @nr_segs: Number of @iov segments
577 * @pos: The file position
578 *
579 * We have to do a lock/unlock here to refresh the inode size for
580 * O_APPEND writes, otherwise we can land up writing at the wrong
581 * offset. There is still a race, but provided the app is using its
582 * own file locking, this will make O_APPEND work as expected.
583 *
584 */
585
586static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
587 unsigned long nr_segs, loff_t pos)
588{
589 struct file *file = iocb->ki_filp;
590
591 if (file->f_flags & O_APPEND) {
592 struct dentry *dentry = file->f_dentry;
593 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
594 struct gfs2_holder gh;
595 int ret;
596
597 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
598 if (ret)
599 return ret;
600 gfs2_glock_dq_uninit(&gh);
601 }
602
603 return generic_file_aio_write(iocb, iov, nr_segs, pos);
604}
605
572#ifdef CONFIG_GFS2_FS_LOCKING_DLM 606#ifdef CONFIG_GFS2_FS_LOCKING_DLM
573 607
574/** 608/**
@@ -711,7 +745,7 @@ const struct file_operations gfs2_file_fops = {
711 .read = do_sync_read, 745 .read = do_sync_read,
712 .aio_read = generic_file_aio_read, 746 .aio_read = generic_file_aio_read,
713 .write = do_sync_write, 747 .write = do_sync_write,
714 .aio_write = generic_file_aio_write, 748 .aio_write = gfs2_file_aio_write,
715 .unlocked_ioctl = gfs2_ioctl, 749 .unlocked_ioctl = gfs2_ioctl,
716 .mmap = gfs2_mmap, 750 .mmap = gfs2_mmap,
717 .open = gfs2_open, 751 .open = gfs2_open,
@@ -741,7 +775,7 @@ const struct file_operations gfs2_file_fops_nolock = {
741 .read = do_sync_read, 775 .read = do_sync_read,
742 .aio_read = generic_file_aio_read, 776 .aio_read = generic_file_aio_read,
743 .write = do_sync_write, 777 .write = do_sync_write,
744 .aio_write = generic_file_aio_write, 778 .aio_write = gfs2_file_aio_write,
745 .unlocked_ioctl = gfs2_ioctl, 779 .unlocked_ioctl = gfs2_ioctl,
746 .mmap = gfs2_mmap, 780 .mmap = gfs2_mmap,
747 .open = gfs2_open, 781 .open = gfs2_open,
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 26ba2a4c4a2d..6e220f4eee7d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -125,7 +125,7 @@ static struct inode *gfs2_iget_skip(struct super_block *sb,
125 * directory entry when gfs2_inode_lookup() is invoked. Part of the code 125 * directory entry when gfs2_inode_lookup() is invoked. Part of the code
126 * segment inside gfs2_inode_lookup code needs to get moved around. 126 * segment inside gfs2_inode_lookup code needs to get moved around.
127 * 127 *
128 * Clean up I_LOCK and I_NEW as well. 128 * Clears I_NEW as well.
129 **/ 129 **/
130 130
131void gfs2_set_iop(struct inode *inode) 131void gfs2_set_iop(struct inode *inode)
@@ -801,7 +801,8 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
801 return err; 801 return err;
802 } 802 }
803 803
804 err = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SECURITY, name, value, len, 0); 804 err = __gfs2_xattr_set(&ip->i_inode, name, value, len, 0,
805 GFS2_EATYPE_SECURITY);
805 kfree(value); 806 kfree(value);
806 kfree(name); 807 kfree(name);
807 808
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index cb8d7a93d5ec..6f68a5f18eb8 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -121,7 +121,7 @@ struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
121 if (aspace) { 121 if (aspace) {
122 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS); 122 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS);
123 aspace->i_mapping->a_ops = &aspace_aops; 123 aspace->i_mapping->a_ops = &aspace_aops;
124 aspace->i_size = ~0ULL; 124 aspace->i_size = MAX_LFS_FILESIZE;
125 ip = GFS2_I(aspace); 125 ip = GFS2_I(aspace);
126 clear_bit(GIF_USER, &ip->i_flags); 126 clear_bit(GIF_USER, &ip->i_flags);
127 insert_inode_hash(aspace); 127 insert_inode_hash(aspace);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 247436c10deb..78f73ca1ef3e 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -748,7 +748,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
748 struct gfs2_rgrpd *nrgd; 748 struct gfs2_rgrpd *nrgd;
749 unsigned int num_gh; 749 unsigned int num_gh;
750 int dir_rename = 0; 750 int dir_rename = 0;
751 int alloc_required; 751 int alloc_required = 0;
752 unsigned int x; 752 unsigned int x;
753 int error; 753 int error;
754 754
@@ -867,7 +867,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
867 goto out_gunlock; 867 goto out_gunlock;
868 } 868 }
869 869
870 alloc_required = error = gfs2_diradd_alloc_required(ndir, &ndentry->d_name); 870 if (nip == NULL)
871 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
872 error = alloc_required;
871 if (error < 0) 873 if (error < 0)
872 goto out_gunlock; 874 goto out_gunlock;
873 error = 0; 875 error = 0;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index c5dad1eb7b91..0dc34621f6a6 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -85,11 +85,7 @@ static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
85 buf[0] = '\0'; 85 buf[0] = '\0';
86 if (!gfs2_uuid_valid(uuid)) 86 if (!gfs2_uuid_valid(uuid))
87 return 0; 87 return 0;
88 return snprintf(buf, PAGE_SIZE, "%02X%02X%02X%02X-%02X%02X-" 88 return snprintf(buf, PAGE_SIZE, "%pUB\n", uuid);
89 "%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n",
90 uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5],
91 uuid[6], uuid[7], uuid[8], uuid[9], uuid[10], uuid[11],
92 uuid[12], uuid[13], uuid[14], uuid[15]);
93} 89}
94 90
95static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) 91static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
@@ -575,14 +571,8 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
575 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); 571 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
576 if (!sdp->sd_args.ar_spectator) 572 if (!sdp->sd_args.ar_spectator)
577 add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid); 573 add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid);
578 if (gfs2_uuid_valid(uuid)) { 574 if (gfs2_uuid_valid(uuid))
579 add_uevent_var(env, "UUID=%02X%02X%02X%02X-%02X%02X-%02X%02X-" 575 add_uevent_var(env, "UUID=%pUB", uuid);
580 "%02X%02X-%02X%02X%02X%02X%02X%02X",
581 uuid[0], uuid[1], uuid[2], uuid[3], uuid[4],
582 uuid[5], uuid[6], uuid[7], uuid[8], uuid[9],
583 uuid[10], uuid[11], uuid[12], uuid[13],
584 uuid[14], uuid[15]);
585 }
586 return 0; 576 return 0;
587} 577}
588 578
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 912f5cbc4740..c2ebdf2c01d4 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -567,18 +567,17 @@ out:
567/** 567/**
568 * gfs2_xattr_get - Get a GFS2 extended attribute 568 * gfs2_xattr_get - Get a GFS2 extended attribute
569 * @inode: The inode 569 * @inode: The inode
570 * @type: The type of extended attribute
571 * @name: The name of the extended attribute 570 * @name: The name of the extended attribute
572 * @buffer: The buffer to write the result into 571 * @buffer: The buffer to write the result into
573 * @size: The size of the buffer 572 * @size: The size of the buffer
573 * @type: The type of extended attribute
574 * 574 *
575 * Returns: actual size of data on success, -errno on error 575 * Returns: actual size of data on success, -errno on error
576 */ 576 */
577 577static int gfs2_xattr_get(struct dentry *dentry, const char *name,
578int gfs2_xattr_get(struct inode *inode, int type, const char *name, 578 void *buffer, size_t size, int type)
579 void *buffer, size_t size)
580{ 579{
581 struct gfs2_inode *ip = GFS2_I(inode); 580 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
582 struct gfs2_ea_location el; 581 struct gfs2_ea_location el;
583 int error; 582 int error;
584 583
@@ -1119,7 +1118,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1119 1118
1120/** 1119/**
1121 * gfs2_xattr_remove - Remove a GFS2 extended attribute 1120 * gfs2_xattr_remove - Remove a GFS2 extended attribute
1122 * @inode: The inode 1121 * @ip: The inode
1123 * @type: The type of the extended attribute 1122 * @type: The type of the extended attribute
1124 * @name: The name of the extended attribute 1123 * @name: The name of the extended attribute
1125 * 1124 *
@@ -1130,9 +1129,8 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1130 * Returns: 0, or errno on failure 1129 * Returns: 0, or errno on failure
1131 */ 1130 */
1132 1131
1133static int gfs2_xattr_remove(struct inode *inode, int type, const char *name) 1132static int gfs2_xattr_remove(struct gfs2_inode *ip, int type, const char *name)
1134{ 1133{
1135 struct gfs2_inode *ip = GFS2_I(inode);
1136 struct gfs2_ea_location el; 1134 struct gfs2_ea_location el;
1137 int error; 1135 int error;
1138 1136
@@ -1156,24 +1154,24 @@ static int gfs2_xattr_remove(struct inode *inode, int type, const char *name)
1156} 1154}
1157 1155
1158/** 1156/**
1159 * gfs2_xattr_set - Set (or remove) a GFS2 extended attribute 1157 * __gfs2_xattr_set - Set (or remove) a GFS2 extended attribute
1160 * @inode: The inode 1158 * @ip: The inode
1161 * @type: The type of the extended attribute
1162 * @name: The name of the extended attribute 1159 * @name: The name of the extended attribute
1163 * @value: The value of the extended attribute (NULL for remove) 1160 * @value: The value of the extended attribute (NULL for remove)
1164 * @size: The size of the @value argument 1161 * @size: The size of the @value argument
1165 * @flags: Create or Replace 1162 * @flags: Create or Replace
1163 * @type: The type of the extended attribute
1166 * 1164 *
1167 * See gfs2_xattr_remove() for details of the removal of xattrs. 1165 * See gfs2_xattr_remove() for details of the removal of xattrs.
1168 * 1166 *
1169 * Returns: 0 or errno on failure 1167 * Returns: 0 or errno on failure
1170 */ 1168 */
1171 1169
1172int gfs2_xattr_set(struct inode *inode, int type, const char *name, 1170int __gfs2_xattr_set(struct inode *inode, const char *name,
1173 const void *value, size_t size, int flags) 1171 const void *value, size_t size, int flags, int type)
1174{ 1172{
1175 struct gfs2_sbd *sdp = GFS2_SB(inode);
1176 struct gfs2_inode *ip = GFS2_I(inode); 1173 struct gfs2_inode *ip = GFS2_I(inode);
1174 struct gfs2_sbd *sdp = GFS2_SB(inode);
1177 struct gfs2_ea_location el; 1175 struct gfs2_ea_location el;
1178 unsigned int namel = strlen(name); 1176 unsigned int namel = strlen(name);
1179 int error; 1177 int error;
@@ -1184,7 +1182,7 @@ int gfs2_xattr_set(struct inode *inode, int type, const char *name,
1184 return -ERANGE; 1182 return -ERANGE;
1185 1183
1186 if (value == NULL) 1184 if (value == NULL)
1187 return gfs2_xattr_remove(inode, type, name); 1185 return gfs2_xattr_remove(ip, type, name);
1188 1186
1189 if (ea_check_size(sdp, namel, size)) 1187 if (ea_check_size(sdp, namel, size))
1190 return -ERANGE; 1188 return -ERANGE;
@@ -1224,6 +1222,13 @@ int gfs2_xattr_set(struct inode *inode, int type, const char *name,
1224 return error; 1222 return error;
1225} 1223}
1226 1224
1225static int gfs2_xattr_set(struct dentry *dentry, const char *name,
1226 const void *value, size_t size, int flags, int type)
1227{
1228 return __gfs2_xattr_set(dentry->d_inode, name, value,
1229 size, flags, type);
1230}
1231
1227static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, 1232static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
1228 struct gfs2_ea_header *ea, char *data) 1233 struct gfs2_ea_header *ea, char *data)
1229{ 1234{
@@ -1291,6 +1296,7 @@ fail:
1291 1296
1292int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) 1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1293{ 1298{
1299 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1294 struct gfs2_ea_location el; 1300 struct gfs2_ea_location el;
1295 struct buffer_head *dibh; 1301 struct buffer_head *dibh;
1296 int error; 1302 int error;
@@ -1300,16 +1306,17 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1300 return error; 1306 return error;
1301 1307
1302 if (GFS2_EA_IS_STUFFED(el.el_ea)) { 1308 if (GFS2_EA_IS_STUFFED(el.el_ea)) {
1303 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0); 1309 error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0);
1304 if (error) 1310 if (error == 0) {
1305 return error; 1311 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1);
1306 1312 memcpy(GFS2_EA2DATA(el.el_ea), data,
1307 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1); 1313 GFS2_EA_DATA_LEN(el.el_ea));
1308 memcpy(GFS2_EA2DATA(el.el_ea), data, 1314 }
1309 GFS2_EA_DATA_LEN(el.el_ea)); 1315 } else {
1310 } else
1311 error = ea_acl_chmod_unstuffed(ip, el.el_ea, data); 1316 error = ea_acl_chmod_unstuffed(ip, el.el_ea, data);
1317 }
1312 1318
1319 brelse(el.el_bh);
1313 if (error) 1320 if (error)
1314 return error; 1321 return error;
1315 1322
@@ -1322,8 +1329,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1322 brelse(dibh); 1329 brelse(dibh);
1323 } 1330 }
1324 1331
1325 gfs2_trans_end(GFS2_SB(&ip->i_inode)); 1332 gfs2_trans_end(sdp);
1326
1327 return error; 1333 return error;
1328} 1334}
1329 1335
@@ -1529,40 +1535,18 @@ out_alloc:
1529 return error; 1535 return error;
1530} 1536}
1531 1537
1532static int gfs2_xattr_user_get(struct inode *inode, const char *name,
1533 void *buffer, size_t size)
1534{
1535 return gfs2_xattr_get(inode, GFS2_EATYPE_USR, name, buffer, size);
1536}
1537
1538static int gfs2_xattr_user_set(struct inode *inode, const char *name,
1539 const void *value, size_t size, int flags)
1540{
1541 return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags);
1542}
1543
1544static int gfs2_xattr_security_get(struct inode *inode, const char *name,
1545 void *buffer, size_t size)
1546{
1547 return gfs2_xattr_get(inode, GFS2_EATYPE_SECURITY, name, buffer, size);
1548}
1549
1550static int gfs2_xattr_security_set(struct inode *inode, const char *name,
1551 const void *value, size_t size, int flags)
1552{
1553 return gfs2_xattr_set(inode, GFS2_EATYPE_SECURITY, name, value, size, flags);
1554}
1555
1556static struct xattr_handler gfs2_xattr_user_handler = { 1538static struct xattr_handler gfs2_xattr_user_handler = {
1557 .prefix = XATTR_USER_PREFIX, 1539 .prefix = XATTR_USER_PREFIX,
1558 .get = gfs2_xattr_user_get, 1540 .flags = GFS2_EATYPE_USR,
1559 .set = gfs2_xattr_user_set, 1541 .get = gfs2_xattr_get,
1542 .set = gfs2_xattr_set,
1560}; 1543};
1561 1544
1562static struct xattr_handler gfs2_xattr_security_handler = { 1545static struct xattr_handler gfs2_xattr_security_handler = {
1563 .prefix = XATTR_SECURITY_PREFIX, 1546 .prefix = XATTR_SECURITY_PREFIX,
1564 .get = gfs2_xattr_security_get, 1547 .flags = GFS2_EATYPE_SECURITY,
1565 .set = gfs2_xattr_security_set, 1548 .get = gfs2_xattr_get,
1549 .set = gfs2_xattr_set,
1566}; 1550};
1567 1551
1568struct xattr_handler *gfs2_xattr_handlers[] = { 1552struct xattr_handler *gfs2_xattr_handlers[] = {
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index 8d6ae5813c4d..d392f8358f2f 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -53,10 +53,9 @@ struct gfs2_ea_location {
53 struct gfs2_ea_header *el_prev; 53 struct gfs2_ea_header *el_prev;
54}; 54};
55 55
56extern int gfs2_xattr_get(struct inode *inode, int type, const char *name, 56extern int __gfs2_xattr_set(struct inode *inode, const char *name,
57 void *buffer, size_t size); 57 const void *value, size_t size,
58extern int gfs2_xattr_set(struct inode *inode, int type, const char *name, 58 int flags, int type);
59 const void *value, size_t size, int flags);
60extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size); 59extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
61extern int gfs2_ea_dealloc(struct gfs2_inode *ip); 60extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
62 61
diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c
index 6d98f116ca03..424b0337f524 100644
--- a/fs/hfs/catalog.c
+++ b/fs/hfs/catalog.c
@@ -289,6 +289,10 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, struct qstr *src_name,
289 err = hfs_brec_find(&src_fd); 289 err = hfs_brec_find(&src_fd);
290 if (err) 290 if (err)
291 goto out; 291 goto out;
292 if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) {
293 err = -EIO;
294 goto out;
295 }
292 296
293 hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset, 297 hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset,
294 src_fd.entrylength); 298 src_fd.entrylength);
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 7c69b98a2e45..2b3b8611b41b 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -79,6 +79,11 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
79 filp->f_pos++; 79 filp->f_pos++;
80 /* fall through */ 80 /* fall through */
81 case 1: 81 case 1:
82 if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) {
83 err = -EIO;
84 goto out;
85 }
86
82 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); 87 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength);
83 if (entry.type != HFS_CDR_THD) { 88 if (entry.type != HFS_CDR_THD) {
84 printk(KERN_ERR "hfs: bad catalog folder thread\n"); 89 printk(KERN_ERR "hfs: bad catalog folder thread\n");
@@ -109,6 +114,12 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
109 err = -EIO; 114 err = -EIO;
110 goto out; 115 goto out;
111 } 116 }
117
118 if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) {
119 err = -EIO;
120 goto out;
121 }
122
112 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); 123 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength);
113 type = entry.type; 124 type = entry.type;
114 len = hfs_mac2asc(sb, strbuf, &fd.key->cat.CName); 125 len = hfs_mac2asc(sb, strbuf, &fd.key->cat.CName);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index f7fcbe49da72..5ed7252b7b23 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -409,8 +409,13 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
409 /* try to get the root inode */ 409 /* try to get the root inode */
410 hfs_find_init(HFS_SB(sb)->cat_tree, &fd); 410 hfs_find_init(HFS_SB(sb)->cat_tree, &fd);
411 res = hfs_cat_find_brec(sb, HFS_ROOT_CNID, &fd); 411 res = hfs_cat_find_brec(sb, HFS_ROOT_CNID, &fd);
412 if (!res) 412 if (!res) {
413 if (fd.entrylength > sizeof(rec) || fd.entrylength < 0) {
414 res = -EIO;
415 goto bail;
416 }
413 hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, fd.entrylength); 417 hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, fd.entrylength);
418 }
414 if (res) { 419 if (res) {
415 hfs_find_exit(&fd); 420 hfs_find_exit(&fd);
416 goto bail_no_root; 421 goto bail_no_root;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index f2feaa06bf26..cadc4ce48656 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -14,6 +14,7 @@
14#include <linux/magic.h> 14#include <linux/magic.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/smp_lock.h> 16#include <linux/smp_lock.h>
17#include <linux/bitmap.h>
17 18
18/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ 19/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
19 20
@@ -115,15 +116,13 @@ static void hpfs_put_super(struct super_block *s)
115unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) 116unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno)
116{ 117{
117 struct quad_buffer_head qbh; 118 struct quad_buffer_head qbh;
118 unsigned *bits; 119 unsigned long *bits;
119 unsigned i, count; 120 unsigned count;
120 if (!(bits = hpfs_map_4sectors(s, secno, &qbh, 4))) return 0; 121
121 count = 0; 122 bits = hpfs_map_4sectors(s, secno, &qbh, 4);
122 for (i = 0; i < 2048 / sizeof(unsigned); i++) { 123 if (!bits)
123 unsigned b; 124 return 0;
124 if (!bits[i]) continue; 125 count = bitmap_weight(bits, 2048 * BITS_PER_BYTE);
125 for (b = bits[i]; b; b>>=1) count += b & 1;
126 }
127 hpfs_brelse4(&qbh); 126 hpfs_brelse4(&qbh);
128 return count; 127 return count;
129} 128}
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index a5089a6dd67a..7239efc690d8 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -646,22 +646,27 @@ static const struct super_operations hppfs_sbops = {
646static int hppfs_readlink(struct dentry *dentry, char __user *buffer, 646static int hppfs_readlink(struct dentry *dentry, char __user *buffer,
647 int buflen) 647 int buflen)
648{ 648{
649 struct dentry *proc_dentry; 649 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
650
651 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
652 return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, 650 return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer,
653 buflen); 651 buflen);
654} 652}
655 653
656static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) 654static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
657{ 655{
658 struct dentry *proc_dentry; 656 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
659
660 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
661 657
662 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); 658 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
663} 659}
664 660
661static void hppfs_put_link(struct dentry *dentry, struct nameidata *nd,
662 void *cookie)
663{
664 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
665
666 if (proc_dentry->d_inode->i_op->put_link)
667 proc_dentry->d_inode->i_op->put_link(proc_dentry, nd, cookie);
668}
669
665static const struct inode_operations hppfs_dir_iops = { 670static const struct inode_operations hppfs_dir_iops = {
666 .lookup = hppfs_lookup, 671 .lookup = hppfs_lookup,
667}; 672};
@@ -669,6 +674,7 @@ static const struct inode_operations hppfs_dir_iops = {
669static const struct inode_operations hppfs_link_iops = { 674static const struct inode_operations hppfs_link_iops = {
670 .readlink = hppfs_readlink, 675 .readlink = hppfs_readlink,
671 .follow_link = hppfs_follow_link, 676 .follow_link = hppfs_follow_link,
677 .put_link = hppfs_put_link,
672}; 678};
673 679
674static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) 680static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 87a1258953b8..a0bbd3d1b41a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -30,7 +30,6 @@
30#include <linux/dnotify.h> 30#include <linux/dnotify.h>
31#include <linux/statfs.h> 31#include <linux/statfs.h>
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/ima.h>
34#include <linux/magic.h> 33#include <linux/magic.h>
35 34
36#include <asm/uaccess.h> 35#include <asm/uaccess.h>
@@ -922,7 +921,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
922 int error = -ENOMEM; 921 int error = -ENOMEM;
923 struct file *file; 922 struct file *file;
924 struct inode *inode; 923 struct inode *inode;
925 struct dentry *dentry, *root; 924 struct path path;
925 struct dentry *root;
926 struct qstr quick_string; 926 struct qstr quick_string;
927 927
928 *user = NULL; 928 *user = NULL;
@@ -944,10 +944,11 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
944 quick_string.name = name; 944 quick_string.name = name;
945 quick_string.len = strlen(quick_string.name); 945 quick_string.len = strlen(quick_string.name);
946 quick_string.hash = 0; 946 quick_string.hash = 0;
947 dentry = d_alloc(root, &quick_string); 947 path.dentry = d_alloc(root, &quick_string);
948 if (!dentry) 948 if (!path.dentry)
949 goto out_shm_unlock; 949 goto out_shm_unlock;
950 950
951 path.mnt = mntget(hugetlbfs_vfsmount);
951 error = -ENOSPC; 952 error = -ENOSPC;
952 inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(), 953 inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(),
953 current_fsgid(), S_IFREG | S_IRWXUGO, 0); 954 current_fsgid(), S_IFREG | S_IRWXUGO, 0);
@@ -960,24 +961,22 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
960 acctflag)) 961 acctflag))
961 goto out_inode; 962 goto out_inode;
962 963
963 d_instantiate(dentry, inode); 964 d_instantiate(path.dentry, inode);
964 inode->i_size = size; 965 inode->i_size = size;
965 inode->i_nlink = 0; 966 inode->i_nlink = 0;
966 967
967 error = -ENFILE; 968 error = -ENFILE;
968 file = alloc_file(hugetlbfs_vfsmount, dentry, 969 file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
969 FMODE_WRITE | FMODE_READ,
970 &hugetlbfs_file_operations); 970 &hugetlbfs_file_operations);
971 if (!file) 971 if (!file)
972 goto out_dentry; /* inode is already attached */ 972 goto out_dentry; /* inode is already attached */
973 ima_counts_get(file);
974 973
975 return file; 974 return file;
976 975
977out_inode: 976out_inode:
978 iput(inode); 977 iput(inode);
979out_dentry: 978out_dentry:
980 dput(dentry); 979 path_put(&path);
981out_shm_unlock: 980out_shm_unlock:
982 if (*user) { 981 if (*user) {
983 user_shm_unlock(size, *user); 982 user_shm_unlock(size, *user);
diff --git a/fs/inode.c b/fs/inode.c
index 06c1f02de611..03dfeb2e3928 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -113,7 +113,7 @@ static void wake_up_inode(struct inode *inode)
113 * Prevent speculative execution through spin_unlock(&inode_lock); 113 * Prevent speculative execution through spin_unlock(&inode_lock);
114 */ 114 */
115 smp_mb(); 115 smp_mb();
116 wake_up_bit(&inode->i_state, __I_LOCK); 116 wake_up_bit(&inode->i_state, __I_NEW);
117} 117}
118 118
119/** 119/**
@@ -690,17 +690,17 @@ void unlock_new_inode(struct inode *inode)
690 } 690 }
691#endif 691#endif
692 /* 692 /*
693 * This is special! We do not need the spinlock when clearing I_LOCK, 693 * This is special! We do not need the spinlock when clearing I_NEW,
694 * because we're guaranteed that nobody else tries to do anything about 694 * because we're guaranteed that nobody else tries to do anything about
695 * the state of the inode when it is locked, as we just created it (so 695 * the state of the inode when it is locked, as we just created it (so
696 * there can be no old holders that haven't tested I_LOCK). 696 * there can be no old holders that haven't tested I_NEW).
697 * However we must emit the memory barrier so that other CPUs reliably 697 * However we must emit the memory barrier so that other CPUs reliably
698 * see the clearing of I_LOCK after the other inode initialisation has 698 * see the clearing of I_NEW after the other inode initialisation has
699 * completed. 699 * completed.
700 */ 700 */
701 smp_mb(); 701 smp_mb();
702 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); 702 WARN_ON(!(inode->i_state & I_NEW));
703 inode->i_state &= ~(I_LOCK|I_NEW); 703 inode->i_state &= ~I_NEW;
704 wake_up_inode(inode); 704 wake_up_inode(inode);
705} 705}
706EXPORT_SYMBOL(unlock_new_inode); 706EXPORT_SYMBOL(unlock_new_inode);
@@ -731,7 +731,7 @@ static struct inode *get_new_inode(struct super_block *sb,
731 goto set_failed; 731 goto set_failed;
732 732
733 __inode_add_to_lists(sb, head, inode); 733 __inode_add_to_lists(sb, head, inode);
734 inode->i_state = I_LOCK|I_NEW; 734 inode->i_state = I_NEW;
735 spin_unlock(&inode_lock); 735 spin_unlock(&inode_lock);
736 736
737 /* Return the locked inode with I_NEW set, the 737 /* Return the locked inode with I_NEW set, the
@@ -778,7 +778,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
778 if (!old) { 778 if (!old) {
779 inode->i_ino = ino; 779 inode->i_ino = ino;
780 __inode_add_to_lists(sb, head, inode); 780 __inode_add_to_lists(sb, head, inode);
781 inode->i_state = I_LOCK|I_NEW; 781 inode->i_state = I_NEW;
782 spin_unlock(&inode_lock); 782 spin_unlock(&inode_lock);
783 783
784 /* Return the locked inode with I_NEW set, the 784 /* Return the locked inode with I_NEW set, the
@@ -1083,7 +1083,7 @@ int insert_inode_locked(struct inode *inode)
1083 ino_t ino = inode->i_ino; 1083 ino_t ino = inode->i_ino;
1084 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1084 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1085 1085
1086 inode->i_state |= I_LOCK|I_NEW; 1086 inode->i_state |= I_NEW;
1087 while (1) { 1087 while (1) {
1088 struct hlist_node *node; 1088 struct hlist_node *node;
1089 struct inode *old = NULL; 1089 struct inode *old = NULL;
@@ -1120,7 +1120,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1120 struct super_block *sb = inode->i_sb; 1120 struct super_block *sb = inode->i_sb;
1121 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1121 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1122 1122
1123 inode->i_state |= I_LOCK|I_NEW; 1123 inode->i_state |= I_NEW;
1124 1124
1125 while (1) { 1125 while (1) {
1126 struct hlist_node *node; 1126 struct hlist_node *node;
@@ -1510,7 +1510,7 @@ EXPORT_SYMBOL(inode_wait);
1510 * until the deletion _might_ have completed. Callers are responsible 1510 * until the deletion _might_ have completed. Callers are responsible
1511 * to recheck inode state. 1511 * to recheck inode state.
1512 * 1512 *
1513 * It doesn't matter if I_LOCK is not set initially, a call to 1513 * It doesn't matter if I_NEW is not set initially, a call to
1514 * wake_up_inode() after removing from the hash list will DTRT. 1514 * wake_up_inode() after removing from the hash list will DTRT.
1515 * 1515 *
1516 * This is called with inode_lock held. 1516 * This is called with inode_lock held.
@@ -1518,8 +1518,8 @@ EXPORT_SYMBOL(inode_wait);
1518static void __wait_on_freeing_inode(struct inode *inode) 1518static void __wait_on_freeing_inode(struct inode *inode)
1519{ 1519{
1520 wait_queue_head_t *wq; 1520 wait_queue_head_t *wq;
1521 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK); 1521 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
1522 wq = bit_waitqueue(&inode->i_state, __I_LOCK); 1522 wq = bit_waitqueue(&inode->i_state, __I_NEW);
1523 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1523 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
1524 spin_unlock(&inode_lock); 1524 spin_unlock(&inode_lock);
1525 schedule(); 1525 schedule();
diff --git a/fs/internal.h b/fs/internal.h
index 515175b8b72e..e96a1667d749 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -79,8 +79,16 @@ extern void chroot_fs_refs(struct path *, struct path *);
79 * file_table.c 79 * file_table.c
80 */ 80 */
81extern void mark_files_ro(struct super_block *); 81extern void mark_files_ro(struct super_block *);
82extern struct file *get_empty_filp(void);
82 83
83/* 84/*
84 * super.c 85 * super.c
85 */ 86 */
86extern int do_remount_sb(struct super_block *, int, void *, int); 87extern int do_remount_sb(struct super_block *, int, void *, int);
88
89/*
90 * open.c
91 */
92struct nameidata;
93extern struct file *nameidata_to_filp(struct nameidata *);
94extern void release_open_intent(struct nameidata *);
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index e81a30593ba9..ed752cb38474 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -9,7 +9,7 @@
9 * 9 *
10 * The following files are helpful: 10 * The following files are helpful:
11 * 11 *
12 * Documentation/filesystems/Exporting 12 * Documentation/filesystems/nfs/Exporting
13 * fs/exportfs/expfs.c. 13 * fs/exportfs/expfs.c.
14 */ 14 */
15 15
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 4160afad6d00..bd224eec9b07 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1913,7 +1913,7 @@ static void __init jbd_create_debugfs_entry(void)
1913{ 1913{
1914 jbd_debugfs_dir = debugfs_create_dir("jbd", NULL); 1914 jbd_debugfs_dir = debugfs_create_dir("jbd", NULL);
1915 if (jbd_debugfs_dir) 1915 if (jbd_debugfs_dir)
1916 jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO, 1916 jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO | S_IWUSR,
1917 jbd_debugfs_dir, 1917 jbd_debugfs_dir,
1918 &journal_enable_debug); 1918 &journal_enable_debug);
1919} 1919}
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index ca0f5eb62b20..886849370950 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -22,6 +22,7 @@
22#include <linux/jbd2.h> 22#include <linux/jbd2.h>
23#include <linux/errno.h> 23#include <linux/errno.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/blkdev.h>
25#include <trace/events/jbd2.h> 26#include <trace/events/jbd2.h>
26 27
27/* 28/*
@@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
515 journal->j_tail_sequence = first_tid; 516 journal->j_tail_sequence = first_tid;
516 journal->j_tail = blocknr; 517 journal->j_tail = blocknr;
517 spin_unlock(&journal->j_state_lock); 518 spin_unlock(&journal->j_state_lock);
519
520 /*
521 * If there is an external journal, we need to make sure that
522 * any data blocks that were recently written out --- perhaps
523 * by jbd2_log_do_checkpoint() --- are flushed out before we
524 * drop the transactions from the external journal. It's
525 * unlikely this will be necessary, especially with a
526 * appropriately sized journal, but we need this to guarantee
527 * correctness. Fortunately jbd2_cleanup_journal_tail()
528 * doesn't get called all that often.
529 */
530 if ((journal->j_fs_dev != journal->j_dev) &&
531 (journal->j_flags & JBD2_BARRIER))
532 blkdev_issue_flush(journal->j_fs_dev, NULL);
518 if (!(journal->j_flags & JBD2_ABORT)) 533 if (!(journal->j_flags & JBD2_ABORT))
519 jbd2_journal_update_superblock(journal, 1); 534 jbd2_journal_update_superblock(journal, 1);
520 return 0; 535 return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6a10238d2c63..1bc74b6f26d2 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal,
259 ret = err; 259 ret = err;
260 spin_lock(&journal->j_list_lock); 260 spin_lock(&journal->j_list_lock);
261 J_ASSERT(jinode->i_transaction == commit_transaction); 261 J_ASSERT(jinode->i_transaction == commit_transaction);
262 commit_transaction->t_flushed_data_blocks = 1;
262 jinode->i_flags &= ~JI_COMMIT_RUNNING; 263 jinode->i_flags &= ~JI_COMMIT_RUNNING;
263 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 264 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
264 } 265 }
@@ -708,8 +709,17 @@ start_journal_io:
708 } 709 }
709 } 710 }
710 711
711 /* Done it all: now write the commit record asynchronously. */ 712 /*
713 * If the journal is not located on the file system device,
714 * then we must flush the file system device before we issue
715 * the commit record
716 */
717 if (commit_transaction->t_flushed_data_blocks &&
718 (journal->j_fs_dev != journal->j_dev) &&
719 (journal->j_flags & JBD2_BARRIER))
720 blkdev_issue_flush(journal->j_fs_dev, NULL);
712 721
722 /* Done it all: now write the commit record asynchronously. */
713 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 723 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
714 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { 724 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
715 err = journal_submit_commit_record(journal, commit_transaction, 725 err = journal_submit_commit_record(journal, commit_transaction,
@@ -720,13 +730,6 @@ start_journal_io:
720 blkdev_issue_flush(journal->j_dev, NULL); 730 blkdev_issue_flush(journal->j_dev, NULL);
721 } 731 }
722 732
723 /*
724 * This is the right place to wait for data buffers both for ASYNC
725 * and !ASYNC commit. If commit is ASYNC, we need to wait only after
726 * the commit block went to disk (which happens above). If commit is
727 * SYNC, we need to wait for data buffers before we start writing
728 * commit block, which happens below in such setting.
729 */
730 err = journal_finish_inode_data_buffers(journal, commit_transaction); 733 err = journal_finish_inode_data_buffers(journal, commit_transaction);
731 if (err) { 734 if (err) {
732 printk(KERN_WARNING 735 printk(KERN_WARNING
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b7ca3a92a4db..ac0d027595d0 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -814,7 +814,7 @@ static journal_t * journal_init_common (void)
814 journal_t *journal; 814 journal_t *journal;
815 int err; 815 int err;
816 816
817 journal = kzalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL); 817 journal = kzalloc(sizeof(*journal), GFP_KERNEL);
818 if (!journal) 818 if (!journal)
819 goto fail; 819 goto fail;
820 820
@@ -2115,7 +2115,8 @@ static void __init jbd2_create_debugfs_entry(void)
2115{ 2115{
2116 jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL); 2116 jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL);
2117 if (jbd2_debugfs_dir) 2117 if (jbd2_debugfs_dir)
2118 jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME, S_IRUGO, 2118 jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME,
2119 S_IRUGO | S_IWUSR,
2119 jbd2_debugfs_dir, 2120 jbd2_debugfs_dir,
2120 &jbd2_journal_enable_debug); 2121 &jbd2_journal_enable_debug);
2121} 2122}
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 7edb62e97419..7cdc3196476a 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -350,8 +350,8 @@ int jffs2_acl_chmod(struct inode *inode)
350 return rc; 350 return rc;
351} 351}
352 352
353static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t list_size, 353static size_t jffs2_acl_access_listxattr(struct dentry *dentry, char *list,
354 const char *name, size_t name_len) 354 size_t list_size, const char *name, size_t name_len, int type)
355{ 355{
356 const int retlen = sizeof(POSIX_ACL_XATTR_ACCESS); 356 const int retlen = sizeof(POSIX_ACL_XATTR_ACCESS);
357 357
@@ -360,8 +360,8 @@ static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t
360 return retlen; 360 return retlen;
361} 361}
362 362
363static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_t list_size, 363static size_t jffs2_acl_default_listxattr(struct dentry *dentry, char *list,
364 const char *name, size_t name_len) 364 size_t list_size, const char *name, size_t name_len, int type)
365{ 365{
366 const int retlen = sizeof(POSIX_ACL_XATTR_DEFAULT); 366 const int retlen = sizeof(POSIX_ACL_XATTR_DEFAULT);
367 367
@@ -370,12 +370,16 @@ static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_
370 return retlen; 370 return retlen;
371} 371}
372 372
373static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_t size) 373static int jffs2_acl_getxattr(struct dentry *dentry, const char *name,
374 void *buffer, size_t size, int type)
374{ 375{
375 struct posix_acl *acl; 376 struct posix_acl *acl;
376 int rc; 377 int rc;
377 378
378 acl = jffs2_get_acl(inode, type); 379 if (name[0] != '\0')
380 return -EINVAL;
381
382 acl = jffs2_get_acl(dentry->d_inode, type);
379 if (IS_ERR(acl)) 383 if (IS_ERR(acl))
380 return PTR_ERR(acl); 384 return PTR_ERR(acl);
381 if (!acl) 385 if (!acl)
@@ -386,26 +390,15 @@ static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_
386 return rc; 390 return rc;
387} 391}
388 392
389static int jffs2_acl_access_getxattr(struct inode *inode, const char *name, void *buffer, size_t size) 393static int jffs2_acl_setxattr(struct dentry *dentry, const char *name,
390{ 394 const void *value, size_t size, int flags, int type)
391 if (name[0] != '\0')
392 return -EINVAL;
393 return jffs2_acl_getxattr(inode, ACL_TYPE_ACCESS, buffer, size);
394}
395
396static int jffs2_acl_default_getxattr(struct inode *inode, const char *name, void *buffer, size_t size)
397{
398 if (name[0] != '\0')
399 return -EINVAL;
400 return jffs2_acl_getxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
401}
402
403static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value, size_t size)
404{ 395{
405 struct posix_acl *acl; 396 struct posix_acl *acl;
406 int rc; 397 int rc;
407 398
408 if (!is_owner_or_cap(inode)) 399 if (name[0] != '\0')
400 return -EINVAL;
401 if (!is_owner_or_cap(dentry->d_inode))
409 return -EPERM; 402 return -EPERM;
410 403
411 if (value) { 404 if (value) {
@@ -420,38 +413,24 @@ static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value,
420 } else { 413 } else {
421 acl = NULL; 414 acl = NULL;
422 } 415 }
423 rc = jffs2_set_acl(inode, type, acl); 416 rc = jffs2_set_acl(dentry->d_inode, type, acl);
424 out: 417 out:
425 posix_acl_release(acl); 418 posix_acl_release(acl);
426 return rc; 419 return rc;
427} 420}
428 421
429static int jffs2_acl_access_setxattr(struct inode *inode, const char *name,
430 const void *buffer, size_t size, int flags)
431{
432 if (name[0] != '\0')
433 return -EINVAL;
434 return jffs2_acl_setxattr(inode, ACL_TYPE_ACCESS, buffer, size);
435}
436
437static int jffs2_acl_default_setxattr(struct inode *inode, const char *name,
438 const void *buffer, size_t size, int flags)
439{
440 if (name[0] != '\0')
441 return -EINVAL;
442 return jffs2_acl_setxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
443}
444
445struct xattr_handler jffs2_acl_access_xattr_handler = { 422struct xattr_handler jffs2_acl_access_xattr_handler = {
446 .prefix = POSIX_ACL_XATTR_ACCESS, 423 .prefix = POSIX_ACL_XATTR_ACCESS,
424 .flags = ACL_TYPE_DEFAULT,
447 .list = jffs2_acl_access_listxattr, 425 .list = jffs2_acl_access_listxattr,
448 .get = jffs2_acl_access_getxattr, 426 .get = jffs2_acl_getxattr,
449 .set = jffs2_acl_access_setxattr, 427 .set = jffs2_acl_setxattr,
450}; 428};
451 429
452struct xattr_handler jffs2_acl_default_xattr_handler = { 430struct xattr_handler jffs2_acl_default_xattr_handler = {
453 .prefix = POSIX_ACL_XATTR_DEFAULT, 431 .prefix = POSIX_ACL_XATTR_DEFAULT,
432 .flags = ACL_TYPE_DEFAULT,
454 .list = jffs2_acl_default_listxattr, 433 .list = jffs2_acl_default_listxattr,
455 .get = jffs2_acl_default_getxattr, 434 .get = jffs2_acl_getxattr,
456 .set = jffs2_acl_default_setxattr, 435 .set = jffs2_acl_setxattr,
457}; 436};
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 090c556ffed2..3b6f2fa12cff 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -700,7 +700,8 @@ static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_
700 struct jffs2_raw_inode ri; 700 struct jffs2_raw_inode ri;
701 struct jffs2_node_frag *last_frag; 701 struct jffs2_node_frag *last_frag;
702 union jffs2_device_node dev; 702 union jffs2_device_node dev;
703 char *mdata = NULL, mdatalen = 0; 703 char *mdata = NULL;
704 int mdatalen = 0;
704 uint32_t alloclen, ilen; 705 uint32_t alloclen, ilen;
705 int ret; 706 int ret;
706 707
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 378991cfe40f..e22de8397b74 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -1284,7 +1284,7 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
1284 f->target = NULL; 1284 f->target = NULL;
1285 mutex_unlock(&f->sem); 1285 mutex_unlock(&f->sem);
1286 jffs2_do_clear_inode(c, f); 1286 jffs2_do_clear_inode(c, f);
1287 return -ret; 1287 return ret;
1288 } 1288 }
1289 1289
1290 f->target[je32_to_cpu(latest_node->csize)] = '\0'; 1290 f->target[je32_to_cpu(latest_node->csize)] = '\0';
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index 02c39c64ecb3..eaccee058583 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -44,26 +44,28 @@ int jffs2_init_security(struct inode *inode, struct inode *dir)
44} 44}
45 45
46/* ---- XATTR Handler for "security.*" ----------------- */ 46/* ---- XATTR Handler for "security.*" ----------------- */
47static int jffs2_security_getxattr(struct inode *inode, const char *name, 47static int jffs2_security_getxattr(struct dentry *dentry, const char *name,
48 void *buffer, size_t size) 48 void *buffer, size_t size, int type)
49{ 49{
50 if (!strcmp(name, "")) 50 if (!strcmp(name, ""))
51 return -EINVAL; 51 return -EINVAL;
52 52
53 return do_jffs2_getxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size); 53 return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
54 name, buffer, size);
54} 55}
55 56
56static int jffs2_security_setxattr(struct inode *inode, const char *name, const void *buffer, 57static int jffs2_security_setxattr(struct dentry *dentry, const char *name,
57 size_t size, int flags) 58 const void *buffer, size_t size, int flags, int type)
58{ 59{
59 if (!strcmp(name, "")) 60 if (!strcmp(name, ""))
60 return -EINVAL; 61 return -EINVAL;
61 62
62 return do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size, flags); 63 return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
64 name, buffer, size, flags);
63} 65}
64 66
65static size_t jffs2_security_listxattr(struct inode *inode, char *list, size_t list_size, 67static size_t jffs2_security_listxattr(struct dentry *dentry, char *list,
66 const char *name, size_t name_len) 68 size_t list_size, const char *name, size_t name_len, int type)
67{ 69{
68 size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1; 70 size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1;
69 71
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index 6caf1e1ee26d..800171dca53b 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -23,7 +23,7 @@
23 23
24int jffs2_sum_init(struct jffs2_sb_info *c) 24int jffs2_sum_init(struct jffs2_sb_info *c)
25{ 25{
26 uint32_t sum_size = max_t(uint32_t, c->sector_size, MAX_SUMMARY_SIZE); 26 uint32_t sum_size = min_t(uint32_t, c->sector_size, MAX_SUMMARY_SIZE);
27 27
28 c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL); 28 c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
29 29
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 4b107881acd5..9e75c62c85d6 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -990,9 +990,11 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
990 if (!xhandle) 990 if (!xhandle)
991 continue; 991 continue;
992 if (buffer) { 992 if (buffer) {
993 rc = xhandle->list(inode, buffer+len, size-len, xd->xname, xd->name_len); 993 rc = xhandle->list(dentry, buffer+len, size-len,
994 xd->xname, xd->name_len, xd->flags);
994 } else { 995 } else {
995 rc = xhandle->list(inode, NULL, 0, xd->xname, xd->name_len); 996 rc = xhandle->list(dentry, NULL, 0, xd->xname,
997 xd->name_len, xd->flags);
996 } 998 }
997 if (rc < 0) 999 if (rc < 0)
998 goto out; 1000 goto out;
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c
index 8ec5765ef348..3e5a5e356e05 100644
--- a/fs/jffs2/xattr_trusted.c
+++ b/fs/jffs2/xattr_trusted.c
@@ -16,24 +16,26 @@
16#include <linux/mtd/mtd.h> 16#include <linux/mtd/mtd.h>
17#include "nodelist.h" 17#include "nodelist.h"
18 18
19static int jffs2_trusted_getxattr(struct inode *inode, const char *name, 19static int jffs2_trusted_getxattr(struct dentry *dentry, const char *name,
20 void *buffer, size_t size) 20 void *buffer, size_t size, int type)
21{ 21{
22 if (!strcmp(name, "")) 22 if (!strcmp(name, ""))
23 return -EINVAL; 23 return -EINVAL;
24 return do_jffs2_getxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size); 24 return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
25 name, buffer, size);
25} 26}
26 27
27static int jffs2_trusted_setxattr(struct inode *inode, const char *name, const void *buffer, 28static int jffs2_trusted_setxattr(struct dentry *dentry, const char *name,
28 size_t size, int flags) 29 const void *buffer, size_t size, int flags, int type)
29{ 30{
30 if (!strcmp(name, "")) 31 if (!strcmp(name, ""))
31 return -EINVAL; 32 return -EINVAL;
32 return do_jffs2_setxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size, flags); 33 return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
34 name, buffer, size, flags);
33} 35}
34 36
35static size_t jffs2_trusted_listxattr(struct inode *inode, char *list, size_t list_size, 37static size_t jffs2_trusted_listxattr(struct dentry *dentry, char *list,
36 const char *name, size_t name_len) 38 size_t list_size, const char *name, size_t name_len, int type)
37{ 39{
38 size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1; 40 size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1;
39 41
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c
index 8bbeab90ada1..8544af67dffe 100644
--- a/fs/jffs2/xattr_user.c
+++ b/fs/jffs2/xattr_user.c
@@ -16,24 +16,26 @@
16#include <linux/mtd/mtd.h> 16#include <linux/mtd/mtd.h>
17#include "nodelist.h" 17#include "nodelist.h"
18 18
19static int jffs2_user_getxattr(struct inode *inode, const char *name, 19static int jffs2_user_getxattr(struct dentry *dentry, const char *name,
20 void *buffer, size_t size) 20 void *buffer, size_t size, int type)
21{ 21{
22 if (!strcmp(name, "")) 22 if (!strcmp(name, ""))
23 return -EINVAL; 23 return -EINVAL;
24 return do_jffs2_getxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size); 24 return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
25 name, buffer, size);
25} 26}
26 27
27static int jffs2_user_setxattr(struct inode *inode, const char *name, const void *buffer, 28static int jffs2_user_setxattr(struct dentry *dentry, const char *name,
28 size_t size, int flags) 29 const void *buffer, size_t size, int flags, int type)
29{ 30{
30 if (!strcmp(name, "")) 31 if (!strcmp(name, ""))
31 return -EINVAL; 32 return -EINVAL;
32 return do_jffs2_setxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size, flags); 33 return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
34 name, buffer, size, flags);
33} 35}
34 36
35static size_t jffs2_user_listxattr(struct inode *inode, char *list, size_t list_size, 37static size_t jffs2_user_listxattr(struct dentry *dentry, char *list,
36 const char *name, size_t name_len) 38 size_t list_size, const char *name, size_t name_len, int type)
37{ 39{
38 size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1; 40 size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1;
39 41
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f26e4d03ada5..d945ea76b445 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1292,7 +1292,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1292 */ 1292 */
1293 /* 1293 /*
1294 * I believe this code is no longer needed. Splitting I_LOCK 1294 * I believe this code is no longer needed. Splitting I_LOCK
1295 * into two bits, I_LOCK and I_SYNC should prevent this 1295 * into two bits, I_NEW and I_SYNC should prevent this
1296 * deadlock as well. But since I don't have a JFS testload 1296 * deadlock as well. But since I don't have a JFS testload
1297 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. 1297 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
1298 * Joern 1298 * Joern
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 2234c73fc577..d929a822a74e 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -524,7 +524,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
524 * Page cache is indexed by long. 524 * Page cache is indexed by long.
525 * I would use MAX_LFS_FILESIZE, but it's only half as big 525 * I would use MAX_LFS_FILESIZE, but it's only half as big
526 */ 526 */
527 sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, sb->s_maxbytes); 527 sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, (u64)sb->s_maxbytes);
528#endif 528#endif
529 sb->s_time_gran = 1; 529 sb->s_time_gran = 1;
530 return 0; 530 return 0;
diff --git a/fs/libfs.c b/fs/libfs.c
index 219576c52d80..6e8d17e1dc4c 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -848,7 +848,6 @@ EXPORT_SYMBOL(simple_write_end);
848EXPORT_SYMBOL(simple_dir_inode_operations); 848EXPORT_SYMBOL(simple_dir_inode_operations);
849EXPORT_SYMBOL(simple_dir_operations); 849EXPORT_SYMBOL(simple_dir_operations);
850EXPORT_SYMBOL(simple_empty); 850EXPORT_SYMBOL(simple_empty);
851EXPORT_SYMBOL(d_alloc_name);
852EXPORT_SYMBOL(simple_fill_super); 851EXPORT_SYMBOL(simple_fill_super);
853EXPORT_SYMBOL(simple_getattr); 852EXPORT_SYMBOL(simple_getattr);
854EXPORT_SYMBOL(simple_link); 853EXPORT_SYMBOL(simple_link);
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index bd173a6ca3b1..a7966eed3c17 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -11,10 +11,6 @@
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/smp_lock.h> 13#include <linux/smp_lock.h>
14#include <linux/in.h>
15#include <linux/sunrpc/svc.h>
16#include <linux/sunrpc/clnt.h>
17#include <linux/nfsd/nfsd.h>
18#include <linux/lockd/lockd.h> 14#include <linux/lockd/lockd.h>
19#include <linux/lockd/share.h> 15#include <linux/lockd/share.h>
20 16
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index e1d28ddd2169..56c9519d900a 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -11,10 +11,6 @@
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/smp_lock.h> 13#include <linux/smp_lock.h>
14#include <linux/in.h>
15#include <linux/sunrpc/svc.h>
16#include <linux/sunrpc/clnt.h>
17#include <linux/nfsd/nfsd.h>
18#include <linux/lockd/lockd.h> 14#include <linux/lockd/lockd.h>
19#include <linux/lockd/share.h> 15#include <linux/lockd/share.h>
20 16
diff --git a/fs/namei.c b/fs/namei.c
index 87f97ba90ad1..94a5e60779f9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -35,7 +35,7 @@
35#include <linux/fs_struct.h> 35#include <linux/fs_struct.h>
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37 37
38#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 38#include "internal.h"
39 39
40/* [Feb-1997 T. Schoebel-Theuer] 40/* [Feb-1997 T. Schoebel-Theuer]
41 * Fundamental changes in the pathname lookup mechanisms (namei) 41 * Fundamental changes in the pathname lookup mechanisms (namei)
@@ -108,8 +108,6 @@
108 * any extra contention... 108 * any extra contention...
109 */ 109 */
110 110
111static int __link_path_walk(const char *name, struct nameidata *nd);
112
113/* In order to reduce some races, while at the same time doing additional 111/* In order to reduce some races, while at the same time doing additional
114 * checking and hopefully speeding things up, we copy filenames to the 112 * checking and hopefully speeding things up, we copy filenames to the
115 * kernel data space before using them.. 113 * kernel data space before using them..
@@ -234,6 +232,7 @@ int generic_permission(struct inode *inode, int mask,
234 /* 232 /*
235 * Searching includes executable on directories, else just read. 233 * Searching includes executable on directories, else just read.
236 */ 234 */
235 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
237 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 236 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
238 if (capable(CAP_DAC_READ_SEARCH)) 237 if (capable(CAP_DAC_READ_SEARCH))
239 return 0; 238 return 0;
@@ -414,36 +413,55 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
414} 413}
415 414
416/* 415/*
417 * Internal lookup() using the new generic dcache. 416 * force_reval_path - force revalidation of a dentry
418 * SMP-safe 417 *
418 * In some situations the path walking code will trust dentries without
419 * revalidating them. This causes problems for filesystems that depend on
420 * d_revalidate to handle file opens (e.g. NFSv4). When FS_REVAL_DOT is set
421 * (which indicates that it's possible for the dentry to go stale), force
422 * a d_revalidate call before proceeding.
423 *
424 * Returns 0 if the revalidation was successful. If the revalidation fails,
425 * either return the error returned by d_revalidate or -ESTALE if the
426 * revalidation it just returned 0. If d_revalidate returns 0, we attempt to
427 * invalidate the dentry. It's up to the caller to handle putting references
428 * to the path if necessary.
419 */ 429 */
420static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 430static int
431force_reval_path(struct path *path, struct nameidata *nd)
421{ 432{
422 struct dentry * dentry = __d_lookup(parent, name); 433 int status;
434 struct dentry *dentry = path->dentry;
423 435
424 /* lockess __d_lookup may fail due to concurrent d_move() 436 /*
425 * in some unrelated directory, so try with d_lookup 437 * only check on filesystems where it's possible for the dentry to
438 * become stale. It's assumed that if this flag is set then the
439 * d_revalidate op will also be defined.
426 */ 440 */
427 if (!dentry) 441 if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))
428 dentry = d_lookup(parent, name); 442 return 0;
429 443
430 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) 444 status = dentry->d_op->d_revalidate(dentry, nd);
431 dentry = do_revalidate(dentry, nd); 445 if (status > 0)
446 return 0;
432 447
433 return dentry; 448 if (!status) {
449 d_invalidate(dentry);
450 status = -ESTALE;
451 }
452 return status;
434} 453}
435 454
436/* 455/*
437 * Short-cut version of permission(), for calling by 456 * Short-cut version of permission(), for calling on directories
438 * path_walk(), when dcache lock is held. Combines parts 457 * during pathname resolution. Combines parts of permission()
439 * of permission() and generic_permission(), and tests ONLY for 458 * and generic_permission(), and tests ONLY for MAY_EXEC permission.
440 * MAY_EXEC permission.
441 * 459 *
442 * If appropriate, check DAC only. If not appropriate, or 460 * If appropriate, check DAC only. If not appropriate, or
443 * short-cut DAC fails, then call permission() to do more 461 * short-cut DAC fails, then call ->permission() to do more
444 * complete permission check. 462 * complete permission check.
445 */ 463 */
446static int exec_permission_lite(struct inode *inode) 464static int exec_permission(struct inode *inode)
447{ 465{
448 int ret; 466 int ret;
449 467
@@ -465,99 +483,6 @@ ok:
465 return security_inode_permission(inode, MAY_EXEC); 483 return security_inode_permission(inode, MAY_EXEC);
466} 484}
467 485
468/*
469 * This is called when everything else fails, and we actually have
470 * to go to the low-level filesystem to find out what we should do..
471 *
472 * We get the directory semaphore, and after getting that we also
473 * make sure that nobody added the entry to the dcache in the meantime..
474 * SMP-safe
475 */
476static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
477{
478 struct dentry * result;
479 struct inode *dir = parent->d_inode;
480
481 mutex_lock(&dir->i_mutex);
482 /*
483 * First re-do the cached lookup just in case it was created
484 * while we waited for the directory semaphore..
485 *
486 * FIXME! This could use version numbering or similar to
487 * avoid unnecessary cache lookups.
488 *
489 * The "dcache_lock" is purely to protect the RCU list walker
490 * from concurrent renames at this point (we mustn't get false
491 * negatives from the RCU list walk here, unlike the optimistic
492 * fast walk).
493 *
494 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
495 */
496 result = d_lookup(parent, name);
497 if (!result) {
498 struct dentry *dentry;
499
500 /* Don't create child dentry for a dead directory. */
501 result = ERR_PTR(-ENOENT);
502 if (IS_DEADDIR(dir))
503 goto out_unlock;
504
505 dentry = d_alloc(parent, name);
506 result = ERR_PTR(-ENOMEM);
507 if (dentry) {
508 result = dir->i_op->lookup(dir, dentry, nd);
509 if (result)
510 dput(dentry);
511 else
512 result = dentry;
513 }
514out_unlock:
515 mutex_unlock(&dir->i_mutex);
516 return result;
517 }
518
519 /*
520 * Uhhuh! Nasty case: the cache was re-populated while
521 * we waited on the semaphore. Need to revalidate.
522 */
523 mutex_unlock(&dir->i_mutex);
524 if (result->d_op && result->d_op->d_revalidate) {
525 result = do_revalidate(result, nd);
526 if (!result)
527 result = ERR_PTR(-ENOENT);
528 }
529 return result;
530}
531
532/*
533 * Wrapper to retry pathname resolution whenever the underlying
534 * file system returns an ESTALE.
535 *
536 * Retry the whole path once, forcing real lookup requests
537 * instead of relying on the dcache.
538 */
539static __always_inline int link_path_walk(const char *name, struct nameidata *nd)
540{
541 struct path save = nd->path;
542 int result;
543
544 /* make sure the stuff we saved doesn't go away */
545 path_get(&save);
546
547 result = __link_path_walk(name, nd);
548 if (result == -ESTALE) {
549 /* nd->path had been dropped */
550 nd->path = save;
551 path_get(&nd->path);
552 nd->flags |= LOOKUP_REVAL;
553 result = __link_path_walk(name, nd);
554 }
555
556 path_put(&save);
557
558 return result;
559}
560
561static __always_inline void set_root(struct nameidata *nd) 486static __always_inline void set_root(struct nameidata *nd)
562{ 487{
563 if (!nd->root.mnt) { 488 if (!nd->root.mnt) {
@@ -569,6 +494,8 @@ static __always_inline void set_root(struct nameidata *nd)
569 } 494 }
570} 495}
571 496
497static int link_path_walk(const char *, struct nameidata *);
498
572static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 499static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
573{ 500{
574 int res = 0; 501 int res = 0;
@@ -634,6 +561,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
634 dget(dentry); 561 dget(dentry);
635 } 562 }
636 mntget(path->mnt); 563 mntget(path->mnt);
564 nd->last_type = LAST_BIND;
637 cookie = dentry->d_inode->i_op->follow_link(dentry, nd); 565 cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
638 error = PTR_ERR(cookie); 566 error = PTR_ERR(cookie);
639 if (!IS_ERR(cookie)) { 567 if (!IS_ERR(cookie)) {
@@ -641,11 +569,14 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
641 error = 0; 569 error = 0;
642 if (s) 570 if (s)
643 error = __vfs_follow_link(nd, s); 571 error = __vfs_follow_link(nd, s);
572 else if (nd->last_type == LAST_BIND) {
573 error = force_reval_path(&nd->path, nd);
574 if (error)
575 path_put(&nd->path);
576 }
644 if (dentry->d_inode->i_op->put_link) 577 if (dentry->d_inode->i_op->put_link)
645 dentry->d_inode->i_op->put_link(dentry, nd, cookie); 578 dentry->d_inode->i_op->put_link(dentry, nd, cookie);
646 } 579 }
647 path_put(path);
648
649 return error; 580 return error;
650} 581}
651 582
@@ -672,6 +603,7 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd)
672 current->total_link_count++; 603 current->total_link_count++;
673 nd->depth++; 604 nd->depth++;
674 err = __do_follow_link(path, nd); 605 err = __do_follow_link(path, nd);
606 path_put(path);
675 current->link_count--; 607 current->link_count--;
676 nd->depth--; 608 nd->depth--;
677 return err; 609 return err;
@@ -797,8 +729,19 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
797 struct path *path) 729 struct path *path)
798{ 730{
799 struct vfsmount *mnt = nd->path.mnt; 731 struct vfsmount *mnt = nd->path.mnt;
800 struct dentry *dentry = __d_lookup(nd->path.dentry, name); 732 struct dentry *dentry, *parent;
733 struct inode *dir;
734 /*
735 * See if the low-level filesystem might want
736 * to use its own hash..
737 */
738 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
739 int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name);
740 if (err < 0)
741 return err;
742 }
801 743
744 dentry = __d_lookup(nd->path.dentry, name);
802 if (!dentry) 745 if (!dentry)
803 goto need_lookup; 746 goto need_lookup;
804 if (dentry->d_op && dentry->d_op->d_revalidate) 747 if (dentry->d_op && dentry->d_op->d_revalidate)
@@ -810,7 +753,59 @@ done:
810 return 0; 753 return 0;
811 754
812need_lookup: 755need_lookup:
813 dentry = real_lookup(nd->path.dentry, name, nd); 756 parent = nd->path.dentry;
757 dir = parent->d_inode;
758
759 mutex_lock(&dir->i_mutex);
760 /*
761 * First re-do the cached lookup just in case it was created
762 * while we waited for the directory semaphore..
763 *
764 * FIXME! This could use version numbering or similar to
765 * avoid unnecessary cache lookups.
766 *
767 * The "dcache_lock" is purely to protect the RCU list walker
768 * from concurrent renames at this point (we mustn't get false
769 * negatives from the RCU list walk here, unlike the optimistic
770 * fast walk).
771 *
772 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
773 */
774 dentry = d_lookup(parent, name);
775 if (!dentry) {
776 struct dentry *new;
777
778 /* Don't create child dentry for a dead directory. */
779 dentry = ERR_PTR(-ENOENT);
780 if (IS_DEADDIR(dir))
781 goto out_unlock;
782
783 new = d_alloc(parent, name);
784 dentry = ERR_PTR(-ENOMEM);
785 if (new) {
786 dentry = dir->i_op->lookup(dir, new, nd);
787 if (dentry)
788 dput(new);
789 else
790 dentry = new;
791 }
792out_unlock:
793 mutex_unlock(&dir->i_mutex);
794 if (IS_ERR(dentry))
795 goto fail;
796 goto done;
797 }
798
799 /*
800 * Uhhuh! Nasty case: the cache was re-populated while
801 * we waited on the semaphore. Need to revalidate.
802 */
803 mutex_unlock(&dir->i_mutex);
804 if (dentry->d_op && dentry->d_op->d_revalidate) {
805 dentry = do_revalidate(dentry, nd);
806 if (!dentry)
807 dentry = ERR_PTR(-ENOENT);
808 }
814 if (IS_ERR(dentry)) 809 if (IS_ERR(dentry))
815 goto fail; 810 goto fail;
816 goto done; 811 goto done;
@@ -835,7 +830,7 @@ fail:
835 * Returns 0 and nd will have valid dentry and mnt on success. 830 * Returns 0 and nd will have valid dentry and mnt on success.
836 * Returns error and drops reference to input namei data on failure. 831 * Returns error and drops reference to input namei data on failure.
837 */ 832 */
838static int __link_path_walk(const char *name, struct nameidata *nd) 833static int link_path_walk(const char *name, struct nameidata *nd)
839{ 834{
840 struct path next; 835 struct path next;
841 struct inode *inode; 836 struct inode *inode;
@@ -858,7 +853,7 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
858 unsigned int c; 853 unsigned int c;
859 854
860 nd->flags |= LOOKUP_CONTINUE; 855 nd->flags |= LOOKUP_CONTINUE;
861 err = exec_permission_lite(inode); 856 err = exec_permission(inode);
862 if (err) 857 if (err)
863 break; 858 break;
864 859
@@ -898,16 +893,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
898 case 1: 893 case 1:
899 continue; 894 continue;
900 } 895 }
901 /*
902 * See if the low-level filesystem might want
903 * to use its own hash..
904 */
905 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
906 err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
907 &this);
908 if (err < 0)
909 break;
910 }
911 /* This does the actual lookups.. */ 896 /* This does the actual lookups.. */
912 err = do_lookup(nd, &this, &next); 897 err = do_lookup(nd, &this, &next);
913 if (err) 898 if (err)
@@ -953,12 +938,6 @@ last_component:
953 case 1: 938 case 1:
954 goto return_reval; 939 goto return_reval;
955 } 940 }
956 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
957 err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
958 &this);
959 if (err < 0)
960 break;
961 }
962 err = do_lookup(nd, &this, &next); 941 err = do_lookup(nd, &this, &next);
963 if (err) 942 if (err)
964 break; 943 break;
@@ -1017,8 +996,27 @@ return_err:
1017 996
1018static int path_walk(const char *name, struct nameidata *nd) 997static int path_walk(const char *name, struct nameidata *nd)
1019{ 998{
999 struct path save = nd->path;
1000 int result;
1001
1020 current->total_link_count = 0; 1002 current->total_link_count = 0;
1021 return link_path_walk(name, nd); 1003
1004 /* make sure the stuff we saved doesn't go away */
1005 path_get(&save);
1006
1007 result = link_path_walk(name, nd);
1008 if (result == -ESTALE) {
1009 /* nd->path had been dropped */
1010 current->total_link_count = 0;
1011 nd->path = save;
1012 path_get(&nd->path);
1013 nd->flags |= LOOKUP_REVAL;
1014 result = link_path_walk(name, nd);
1015 }
1016
1017 path_put(&save);
1018
1019 return result;
1022} 1020}
1023 1021
1024static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1022static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
@@ -1141,36 +1139,6 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1141 return retval; 1139 return retval;
1142} 1140}
1143 1141
1144/**
1145 * path_lookup_open - lookup a file path with open intent
1146 * @dfd: the directory to use as base, or AT_FDCWD
1147 * @name: pointer to file name
1148 * @lookup_flags: lookup intent flags
1149 * @nd: pointer to nameidata
1150 * @open_flags: open intent flags
1151 */
1152static int path_lookup_open(int dfd, const char *name,
1153 unsigned int lookup_flags, struct nameidata *nd, int open_flags)
1154{
1155 struct file *filp = get_empty_filp();
1156 int err;
1157
1158 if (filp == NULL)
1159 return -ENFILE;
1160 nd->intent.open.file = filp;
1161 nd->intent.open.flags = open_flags;
1162 nd->intent.open.create_mode = 0;
1163 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
1164 if (IS_ERR(nd->intent.open.file)) {
1165 if (err == 0) {
1166 err = PTR_ERR(nd->intent.open.file);
1167 path_put(&nd->path);
1168 }
1169 } else if (err != 0)
1170 release_open_intent(nd);
1171 return err;
1172}
1173
1174static struct dentry *__lookup_hash(struct qstr *name, 1142static struct dentry *__lookup_hash(struct qstr *name,
1175 struct dentry *base, struct nameidata *nd) 1143 struct dentry *base, struct nameidata *nd)
1176{ 1144{
@@ -1191,7 +1159,17 @@ static struct dentry *__lookup_hash(struct qstr *name,
1191 goto out; 1159 goto out;
1192 } 1160 }
1193 1161
1194 dentry = cached_lookup(base, name, nd); 1162 dentry = __d_lookup(base, name);
1163
1164 /* lockess __d_lookup may fail due to concurrent d_move()
1165 * in some unrelated directory, so try with d_lookup
1166 */
1167 if (!dentry)
1168 dentry = d_lookup(base, name);
1169
1170 if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
1171 dentry = do_revalidate(dentry, nd);
1172
1195 if (!dentry) { 1173 if (!dentry) {
1196 struct dentry *new; 1174 struct dentry *new;
1197 1175
@@ -1223,7 +1201,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1223{ 1201{
1224 int err; 1202 int err;
1225 1203
1226 err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC); 1204 err = exec_permission(nd->path.dentry->d_inode);
1227 if (err) 1205 if (err)
1228 return ERR_PTR(err); 1206 return ERR_PTR(err);
1229 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1207 return __lookup_hash(&nd->last, nd->path.dentry, nd);
@@ -1273,7 +1251,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1273 if (err) 1251 if (err)
1274 return ERR_PTR(err); 1252 return ERR_PTR(err);
1275 1253
1276 err = inode_permission(base->d_inode, MAY_EXEC); 1254 err = exec_permission(base->d_inode);
1277 if (err) 1255 if (err)
1278 return ERR_PTR(err); 1256 return ERR_PTR(err);
1279 return __lookup_hash(&this, base, NULL); 1257 return __lookup_hash(&this, base, NULL);
@@ -1511,69 +1489,45 @@ int may_open(struct path *path, int acc_mode, int flag)
1511 if (error) 1489 if (error)
1512 return error; 1490 return error;
1513 1491
1514 error = ima_path_check(path, acc_mode ?
1515 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
1516 ACC_MODE(flag) & (MAY_READ | MAY_WRITE),
1517 IMA_COUNT_UPDATE);
1518
1519 if (error)
1520 return error;
1521 /* 1492 /*
1522 * An append-only file must be opened in append mode for writing. 1493 * An append-only file must be opened in append mode for writing.
1523 */ 1494 */
1524 if (IS_APPEND(inode)) { 1495 if (IS_APPEND(inode)) {
1525 error = -EPERM;
1526 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1496 if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1527 goto err_out; 1497 return -EPERM;
1528 if (flag & O_TRUNC) 1498 if (flag & O_TRUNC)
1529 goto err_out; 1499 return -EPERM;
1530 } 1500 }
1531 1501
1532 /* O_NOATIME can only be set by the owner or superuser */ 1502 /* O_NOATIME can only be set by the owner or superuser */
1533 if (flag & O_NOATIME) 1503 if (flag & O_NOATIME && !is_owner_or_cap(inode))
1534 if (!is_owner_or_cap(inode)) { 1504 return -EPERM;
1535 error = -EPERM;
1536 goto err_out;
1537 }
1538 1505
1539 /* 1506 /*
1540 * Ensure there are no outstanding leases on the file. 1507 * Ensure there are no outstanding leases on the file.
1541 */ 1508 */
1542 error = break_lease(inode, flag); 1509 return break_lease(inode, flag);
1543 if (error) 1510}
1544 goto err_out;
1545
1546 if (flag & O_TRUNC) {
1547 error = get_write_access(inode);
1548 if (error)
1549 goto err_out;
1550
1551 /*
1552 * Refuse to truncate files with mandatory locks held on them.
1553 */
1554 error = locks_verify_locked(inode);
1555 if (!error)
1556 error = security_path_truncate(path, 0,
1557 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
1558 if (!error) {
1559 vfs_dq_init(inode);
1560
1561 error = do_truncate(dentry, 0,
1562 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
1563 NULL);
1564 }
1565 put_write_access(inode);
1566 if (error)
1567 goto err_out;
1568 } else
1569 if (flag & FMODE_WRITE)
1570 vfs_dq_init(inode);
1571 1511
1572 return 0; 1512static int handle_truncate(struct path *path)
1573err_out: 1513{
1574 ima_counts_put(path, acc_mode ? 1514 struct inode *inode = path->dentry->d_inode;
1575 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) : 1515 int error = get_write_access(inode);
1576 ACC_MODE(flag) & (MAY_READ | MAY_WRITE)); 1516 if (error)
1517 return error;
1518 /*
1519 * Refuse to truncate files with mandatory locks held on them.
1520 */
1521 error = locks_verify_locked(inode);
1522 if (!error)
1523 error = security_path_truncate(path, 0,
1524 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
1525 if (!error) {
1526 error = do_truncate(path->dentry, 0,
1527 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
1528 NULL);
1529 }
1530 put_write_access(inode);
1577 return error; 1531 return error;
1578} 1532}
1579 1533
@@ -1628,7 +1582,7 @@ static inline int open_to_namei_flags(int flag)
1628 return flag; 1582 return flag;
1629} 1583}
1630 1584
1631static int open_will_write_to_fs(int flag, struct inode *inode) 1585static int open_will_truncate(int flag, struct inode *inode)
1632{ 1586{
1633 /* 1587 /*
1634 * We'll never write to the fs underlying 1588 * We'll never write to the fs underlying
@@ -1653,8 +1607,9 @@ struct file *do_filp_open(int dfd, const char *pathname,
1653 struct path path; 1607 struct path path;
1654 struct dentry *dir; 1608 struct dentry *dir;
1655 int count = 0; 1609 int count = 0;
1656 int will_write; 1610 int will_truncate;
1657 int flag = open_to_namei_flags(open_flag); 1611 int flag = open_to_namei_flags(open_flag);
1612 int force_reval = 0;
1658 1613
1659 /* 1614 /*
1660 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 1615 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
@@ -1666,7 +1621,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1666 open_flag |= O_DSYNC; 1621 open_flag |= O_DSYNC;
1667 1622
1668 if (!acc_mode) 1623 if (!acc_mode)
1669 acc_mode = MAY_OPEN | ACC_MODE(flag); 1624 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
1670 1625
1671 /* O_TRUNC implies we need access checks for write permissions */ 1626 /* O_TRUNC implies we need access checks for write permissions */
1672 if (flag & O_TRUNC) 1627 if (flag & O_TRUNC)
@@ -1681,8 +1636,23 @@ struct file *do_filp_open(int dfd, const char *pathname,
1681 * The simplest case - just a plain lookup. 1636 * The simplest case - just a plain lookup.
1682 */ 1637 */
1683 if (!(flag & O_CREAT)) { 1638 if (!(flag & O_CREAT)) {
1684 error = path_lookup_open(dfd, pathname, lookup_flags(flag), 1639 filp = get_empty_filp();
1685 &nd, flag); 1640
1641 if (filp == NULL)
1642 return ERR_PTR(-ENFILE);
1643 nd.intent.open.file = filp;
1644 filp->f_flags = open_flag;
1645 nd.intent.open.flags = flag;
1646 nd.intent.open.create_mode = 0;
1647 error = do_path_lookup(dfd, pathname,
1648 lookup_flags(flag)|LOOKUP_OPEN, &nd);
1649 if (IS_ERR(nd.intent.open.file)) {
1650 if (error == 0) {
1651 error = PTR_ERR(nd.intent.open.file);
1652 path_put(&nd.path);
1653 }
1654 } else if (error)
1655 release_open_intent(&nd);
1686 if (error) 1656 if (error)
1687 return ERR_PTR(error); 1657 return ERR_PTR(error);
1688 goto ok; 1658 goto ok;
@@ -1691,9 +1661,12 @@ struct file *do_filp_open(int dfd, const char *pathname,
1691 /* 1661 /*
1692 * Create - we need to know the parent. 1662 * Create - we need to know the parent.
1693 */ 1663 */
1664reval:
1694 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 1665 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
1695 if (error) 1666 if (error)
1696 return ERR_PTR(error); 1667 return ERR_PTR(error);
1668 if (force_reval)
1669 nd.flags |= LOOKUP_REVAL;
1697 error = path_walk(pathname, &nd); 1670 error = path_walk(pathname, &nd);
1698 if (error) { 1671 if (error) {
1699 if (nd.root.mnt) 1672 if (nd.root.mnt)
@@ -1717,6 +1690,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1717 if (filp == NULL) 1690 if (filp == NULL)
1718 goto exit_parent; 1691 goto exit_parent;
1719 nd.intent.open.file = filp; 1692 nd.intent.open.file = filp;
1693 filp->f_flags = open_flag;
1720 nd.intent.open.flags = flag; 1694 nd.intent.open.flags = flag;
1721 nd.intent.open.create_mode = mode; 1695 nd.intent.open.create_mode = mode;
1722 dir = nd.path.dentry; 1696 dir = nd.path.dentry;
@@ -1757,14 +1731,18 @@ do_last:
1757 mnt_drop_write(nd.path.mnt); 1731 mnt_drop_write(nd.path.mnt);
1758 goto exit; 1732 goto exit;
1759 } 1733 }
1760 filp = nameidata_to_filp(&nd, open_flag); 1734 filp = nameidata_to_filp(&nd);
1761 if (IS_ERR(filp))
1762 ima_counts_put(&nd.path,
1763 acc_mode & (MAY_READ | MAY_WRITE |
1764 MAY_EXEC));
1765 mnt_drop_write(nd.path.mnt); 1735 mnt_drop_write(nd.path.mnt);
1766 if (nd.root.mnt) 1736 if (nd.root.mnt)
1767 path_put(&nd.root); 1737 path_put(&nd.root);
1738 if (!IS_ERR(filp)) {
1739 error = ima_path_check(&filp->f_path, filp->f_mode &
1740 (MAY_READ | MAY_WRITE | MAY_EXEC));
1741 if (error) {
1742 fput(filp);
1743 filp = ERR_PTR(error);
1744 }
1745 }
1768 return filp; 1746 return filp;
1769 } 1747 }
1770 1748
@@ -1792,7 +1770,7 @@ do_last:
1792 1770
1793 path_to_nameidata(&path, &nd); 1771 path_to_nameidata(&path, &nd);
1794 error = -EISDIR; 1772 error = -EISDIR;
1795 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1773 if (S_ISDIR(path.dentry->d_inode->i_mode))
1796 goto exit; 1774 goto exit;
1797ok: 1775ok:
1798 /* 1776 /*
@@ -1805,28 +1783,45 @@ ok:
1805 * be avoided. Taking this mnt write here 1783 * be avoided. Taking this mnt write here
1806 * ensures that (2) can not occur. 1784 * ensures that (2) can not occur.
1807 */ 1785 */
1808 will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode); 1786 will_truncate = open_will_truncate(flag, nd.path.dentry->d_inode);
1809 if (will_write) { 1787 if (will_truncate) {
1810 error = mnt_want_write(nd.path.mnt); 1788 error = mnt_want_write(nd.path.mnt);
1811 if (error) 1789 if (error)
1812 goto exit; 1790 goto exit;
1813 } 1791 }
1814 error = may_open(&nd.path, acc_mode, flag); 1792 error = may_open(&nd.path, acc_mode, flag);
1815 if (error) { 1793 if (error) {
1816 if (will_write) 1794 if (will_truncate)
1817 mnt_drop_write(nd.path.mnt); 1795 mnt_drop_write(nd.path.mnt);
1818 goto exit; 1796 goto exit;
1819 } 1797 }
1820 filp = nameidata_to_filp(&nd, open_flag); 1798 filp = nameidata_to_filp(&nd);
1821 if (IS_ERR(filp)) 1799 if (!IS_ERR(filp)) {
1822 ima_counts_put(&nd.path, 1800 error = ima_path_check(&filp->f_path, filp->f_mode &
1823 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC)); 1801 (MAY_READ | MAY_WRITE | MAY_EXEC));
1802 if (error) {
1803 fput(filp);
1804 filp = ERR_PTR(error);
1805 }
1806 }
1807 if (!IS_ERR(filp)) {
1808 if (acc_mode & MAY_WRITE)
1809 vfs_dq_init(nd.path.dentry->d_inode);
1810
1811 if (will_truncate) {
1812 error = handle_truncate(&nd.path);
1813 if (error) {
1814 fput(filp);
1815 filp = ERR_PTR(error);
1816 }
1817 }
1818 }
1824 /* 1819 /*
1825 * It is now safe to drop the mnt write 1820 * It is now safe to drop the mnt write
1826 * because the filp has had a write taken 1821 * because the filp has had a write taken
1827 * on its behalf. 1822 * on its behalf.
1828 */ 1823 */
1829 if (will_write) 1824 if (will_truncate)
1830 mnt_drop_write(nd.path.mnt); 1825 mnt_drop_write(nd.path.mnt);
1831 if (nd.root.mnt) 1826 if (nd.root.mnt)
1832 path_put(&nd.root); 1827 path_put(&nd.root);
@@ -1864,6 +1859,7 @@ do_link:
1864 if (error) 1859 if (error)
1865 goto exit_dput; 1860 goto exit_dput;
1866 error = __do_follow_link(&path, &nd); 1861 error = __do_follow_link(&path, &nd);
1862 path_put(&path);
1867 if (error) { 1863 if (error) {
1868 /* Does someone understand code flow here? Or it is only 1864 /* Does someone understand code flow here? Or it is only
1869 * me so stupid? Anathema to whoever designed this non-sense 1865 * me so stupid? Anathema to whoever designed this non-sense
@@ -1872,6 +1868,10 @@ do_link:
1872 release_open_intent(&nd); 1868 release_open_intent(&nd);
1873 if (nd.root.mnt) 1869 if (nd.root.mnt)
1874 path_put(&nd.root); 1870 path_put(&nd.root);
1871 if (error == -ESTALE && !force_reval) {
1872 force_reval = 1;
1873 goto reval;
1874 }
1875 return ERR_PTR(error); 1875 return ERR_PTR(error);
1876 } 1876 }
1877 nd.flags &= ~LOOKUP_PARENT; 1877 nd.flags &= ~LOOKUP_PARENT;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7d70d63ceb29..c768f733c8d6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -965,10 +965,12 @@ EXPORT_SYMBOL(may_umount_tree);
965int may_umount(struct vfsmount *mnt) 965int may_umount(struct vfsmount *mnt)
966{ 966{
967 int ret = 1; 967 int ret = 1;
968 down_read(&namespace_sem);
968 spin_lock(&vfsmount_lock); 969 spin_lock(&vfsmount_lock);
969 if (propagate_mount_busy(mnt, 2)) 970 if (propagate_mount_busy(mnt, 2))
970 ret = 0; 971 ret = 0;
971 spin_unlock(&vfsmount_lock); 972 spin_unlock(&vfsmount_lock);
973 up_read(&namespace_sem);
972 return ret; 974 return ret;
973} 975}
974 976
@@ -1352,12 +1354,12 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1352 if (err) 1354 if (err)
1353 goto out_cleanup_ids; 1355 goto out_cleanup_ids;
1354 1356
1357 spin_lock(&vfsmount_lock);
1358
1355 if (IS_MNT_SHARED(dest_mnt)) { 1359 if (IS_MNT_SHARED(dest_mnt)) {
1356 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1360 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1357 set_mnt_shared(p); 1361 set_mnt_shared(p);
1358 } 1362 }
1359
1360 spin_lock(&vfsmount_lock);
1361 if (parent_path) { 1363 if (parent_path) {
1362 detach_mnt(source_mnt, parent_path); 1364 detach_mnt(source_mnt, parent_path);
1363 attach_mnt(source_mnt, path); 1365 attach_mnt(source_mnt, path);
@@ -1534,8 +1536,12 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1534 err = change_mount_flags(path->mnt, flags); 1536 err = change_mount_flags(path->mnt, flags);
1535 else 1537 else
1536 err = do_remount_sb(sb, flags, data, 0); 1538 err = do_remount_sb(sb, flags, data, 0);
1537 if (!err) 1539 if (!err) {
1540 spin_lock(&vfsmount_lock);
1541 mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK;
1538 path->mnt->mnt_flags = mnt_flags; 1542 path->mnt->mnt_flags = mnt_flags;
1543 spin_unlock(&vfsmount_lock);
1544 }
1539 up_write(&sb->s_umount); 1545 up_write(&sb->s_umount);
1540 if (!err) { 1546 if (!err) {
1541 security_sb_post_remount(path->mnt, flags, data); 1547 security_sb_post_remount(path->mnt, flags, data);
@@ -1665,6 +1671,8 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
1665{ 1671{
1666 int err; 1672 int err;
1667 1673
1674 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD);
1675
1668 down_write(&namespace_sem); 1676 down_write(&namespace_sem);
1669 /* Something was mounted here while we slept */ 1677 /* Something was mounted here while we slept */
1670 while (d_mountpoint(path->dentry) && 1678 while (d_mountpoint(path->dentry) &&
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 2a77bc25d5af..59e5673b4597 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -90,7 +90,7 @@ config ROOT_NFS
90 If you want your system to mount its root file system via NFS, 90 If you want your system to mount its root file system via NFS,
91 choose Y here. This is common practice for managing systems 91 choose Y here. This is common practice for managing systems
92 without local permanent storage. For details, read 92 without local permanent storage. For details, read
93 <file:Documentation/filesystems/nfsroot.txt>. 93 <file:Documentation/filesystems/nfs/nfsroot.txt>.
94 94
95 Most people say N here. 95 Most people say N here.
96 96
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2c5ace4f00a7..3c7f03b669fb 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1615,6 +1615,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1615 goto out; 1615 goto out;
1616 1616
1617 new_dentry = dentry; 1617 new_dentry = dentry;
1618 rehash = NULL;
1618 new_inode = NULL; 1619 new_inode = NULL;
1619 } 1620 }
1620 } 1621 }
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7e57b04e4014..865265bdca03 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -108,6 +108,10 @@ enum {
108 NFS_OWNER_RECLAIM_NOGRACE 108 NFS_OWNER_RECLAIM_NOGRACE
109}; 109};
110 110
111#define NFS_LOCK_NEW 0
112#define NFS_LOCK_RECLAIM 1
113#define NFS_LOCK_EXPIRED 2
114
111/* 115/*
112 * struct nfs4_state maintains the client-side state for a given 116 * struct nfs4_state maintains the client-side state for a given
113 * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). 117 * (state_owner,inode) tuple (OPEN) or state_owner (LOCK).
@@ -282,6 +286,7 @@ extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter);
282extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); 286extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
283extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); 287extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
284extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); 288extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
289extern void nfs_release_seqid(struct nfs_seqid *seqid);
285extern void nfs_free_seqid(struct nfs_seqid *seqid); 290extern void nfs_free_seqid(struct nfs_seqid *seqid);
286 291
287extern const nfs4_stateid zero_stateid; 292extern const nfs4_stateid zero_stateid;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9f5f11ecfd93..198d51d17c13 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -64,6 +64,7 @@
64 64
65struct nfs4_opendata; 65struct nfs4_opendata;
66static int _nfs4_proc_open(struct nfs4_opendata *data); 66static int _nfs4_proc_open(struct nfs4_opendata *data);
67static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
67static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 68static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
68static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 69static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
69static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 70static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
@@ -341,6 +342,27 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
341 free_slotid, tbl->highest_used_slotid); 342 free_slotid, tbl->highest_used_slotid);
342} 343}
343 344
345/*
346 * Signal state manager thread if session is drained
347 */
348static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
349{
350 struct rpc_task *task;
351
352 if (!test_bit(NFS4CLNT_SESSION_DRAINING, &ses->clp->cl_state)) {
353 task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq);
354 if (task)
355 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
356 return;
357 }
358
359 if (ses->fc_slot_table.highest_used_slotid != -1)
360 return;
361
362 dprintk("%s COMPLETE: Session Drained\n", __func__);
363 complete(&ses->complete);
364}
365
344static void nfs41_sequence_free_slot(const struct nfs_client *clp, 366static void nfs41_sequence_free_slot(const struct nfs_client *clp,
345 struct nfs4_sequence_res *res) 367 struct nfs4_sequence_res *res)
346{ 368{
@@ -356,15 +378,7 @@ static void nfs41_sequence_free_slot(const struct nfs_client *clp,
356 378
357 spin_lock(&tbl->slot_tbl_lock); 379 spin_lock(&tbl->slot_tbl_lock);
358 nfs4_free_slot(tbl, res->sr_slotid); 380 nfs4_free_slot(tbl, res->sr_slotid);
359 381 nfs41_check_drain_session_complete(clp->cl_session);
360 /* Signal state manager thread if session is drained */
361 if (test_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
362 if (tbl->highest_used_slotid == -1) {
363 dprintk("%s COMPLETE: Session Drained\n", __func__);
364 complete(&clp->cl_session->complete);
365 }
366 } else
367 rpc_wake_up_next(&tbl->slot_tbl_waitq);
368 spin_unlock(&tbl->slot_tbl_lock); 382 spin_unlock(&tbl->slot_tbl_lock);
369 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 383 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
370} 384}
@@ -421,7 +435,7 @@ out:
421 * Note: must be called with under the slot_tbl_lock. 435 * Note: must be called with under the slot_tbl_lock.
422 */ 436 */
423static u8 437static u8
424nfs4_find_slot(struct nfs4_slot_table *tbl, struct rpc_task *task) 438nfs4_find_slot(struct nfs4_slot_table *tbl)
425{ 439{
426 int slotid; 440 int slotid;
427 u8 ret_id = NFS4_MAX_SLOT_TABLE; 441 u8 ret_id = NFS4_MAX_SLOT_TABLE;
@@ -463,7 +477,8 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
463 tbl = &session->fc_slot_table; 477 tbl = &session->fc_slot_table;
464 478
465 spin_lock(&tbl->slot_tbl_lock); 479 spin_lock(&tbl->slot_tbl_lock);
466 if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state)) { 480 if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state) &&
481 !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
467 /* 482 /*
468 * The state manager will wait until the slot table is empty. 483 * The state manager will wait until the slot table is empty.
469 * Schedule the reset thread 484 * Schedule the reset thread
@@ -474,7 +489,15 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
474 return -EAGAIN; 489 return -EAGAIN;
475 } 490 }
476 491
477 slotid = nfs4_find_slot(tbl, task); 492 if (!rpc_queue_empty(&tbl->slot_tbl_waitq) &&
493 !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
494 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
495 spin_unlock(&tbl->slot_tbl_lock);
496 dprintk("%s enforce FIFO order\n", __func__);
497 return -EAGAIN;
498 }
499
500 slotid = nfs4_find_slot(tbl);
478 if (slotid == NFS4_MAX_SLOT_TABLE) { 501 if (slotid == NFS4_MAX_SLOT_TABLE) {
479 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); 502 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
480 spin_unlock(&tbl->slot_tbl_lock); 503 spin_unlock(&tbl->slot_tbl_lock);
@@ -483,6 +506,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
483 } 506 }
484 spin_unlock(&tbl->slot_tbl_lock); 507 spin_unlock(&tbl->slot_tbl_lock);
485 508
509 rpc_task_set_priority(task, RPC_PRIORITY_NORMAL);
486 slot = tbl->slots + slotid; 510 slot = tbl->slots + slotid;
487 args->sa_session = session; 511 args->sa_session = session;
488 args->sa_slotid = slotid; 512 args->sa_slotid = slotid;
@@ -545,6 +569,12 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
545 rpc_call_start(task); 569 rpc_call_start(task);
546} 570}
547 571
572static void nfs41_call_priv_sync_prepare(struct rpc_task *task, void *calldata)
573{
574 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
575 nfs41_call_sync_prepare(task, calldata);
576}
577
548static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) 578static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
549{ 579{
550 struct nfs41_call_sync_data *data = calldata; 580 struct nfs41_call_sync_data *data = calldata;
@@ -557,12 +587,18 @@ struct rpc_call_ops nfs41_call_sync_ops = {
557 .rpc_call_done = nfs41_call_sync_done, 587 .rpc_call_done = nfs41_call_sync_done,
558}; 588};
559 589
590struct rpc_call_ops nfs41_call_priv_sync_ops = {
591 .rpc_call_prepare = nfs41_call_priv_sync_prepare,
592 .rpc_call_done = nfs41_call_sync_done,
593};
594
560static int nfs4_call_sync_sequence(struct nfs_client *clp, 595static int nfs4_call_sync_sequence(struct nfs_client *clp,
561 struct rpc_clnt *clnt, 596 struct rpc_clnt *clnt,
562 struct rpc_message *msg, 597 struct rpc_message *msg,
563 struct nfs4_sequence_args *args, 598 struct nfs4_sequence_args *args,
564 struct nfs4_sequence_res *res, 599 struct nfs4_sequence_res *res,
565 int cache_reply) 600 int cache_reply,
601 int privileged)
566{ 602{
567 int ret; 603 int ret;
568 struct rpc_task *task; 604 struct rpc_task *task;
@@ -580,6 +616,8 @@ static int nfs4_call_sync_sequence(struct nfs_client *clp,
580 }; 616 };
581 617
582 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 618 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
619 if (privileged)
620 task_setup.callback_ops = &nfs41_call_priv_sync_ops;
583 task = rpc_run_task(&task_setup); 621 task = rpc_run_task(&task_setup);
584 if (IS_ERR(task)) 622 if (IS_ERR(task))
585 ret = PTR_ERR(task); 623 ret = PTR_ERR(task);
@@ -597,7 +635,7 @@ int _nfs4_call_sync_session(struct nfs_server *server,
597 int cache_reply) 635 int cache_reply)
598{ 636{
599 return nfs4_call_sync_sequence(server->nfs_client, server->client, 637 return nfs4_call_sync_sequence(server->nfs_client, server->client,
600 msg, args, res, cache_reply); 638 msg, args, res, cache_reply, 0);
601} 639}
602 640
603#endif /* CONFIG_NFS_V4_1 */ 641#endif /* CONFIG_NFS_V4_1 */
@@ -1035,7 +1073,7 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
1035 memset(&opendata->o_res, 0, sizeof(opendata->o_res)); 1073 memset(&opendata->o_res, 0, sizeof(opendata->o_res));
1036 memset(&opendata->c_res, 0, sizeof(opendata->c_res)); 1074 memset(&opendata->c_res, 0, sizeof(opendata->c_res));
1037 nfs4_init_opendata_res(opendata); 1075 nfs4_init_opendata_res(opendata);
1038 ret = _nfs4_proc_open(opendata); 1076 ret = _nfs4_recover_proc_open(opendata);
1039 if (ret != 0) 1077 if (ret != 0)
1040 return ret; 1078 return ret;
1041 newstate = nfs4_opendata_to_nfs4_state(opendata); 1079 newstate = nfs4_opendata_to_nfs4_state(opendata);
@@ -1326,6 +1364,12 @@ out_no_action:
1326 1364
1327} 1365}
1328 1366
1367static void nfs4_recover_open_prepare(struct rpc_task *task, void *calldata)
1368{
1369 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
1370 nfs4_open_prepare(task, calldata);
1371}
1372
1329static void nfs4_open_done(struct rpc_task *task, void *calldata) 1373static void nfs4_open_done(struct rpc_task *task, void *calldata)
1330{ 1374{
1331 struct nfs4_opendata *data = calldata; 1375 struct nfs4_opendata *data = calldata;
@@ -1384,10 +1428,13 @@ static const struct rpc_call_ops nfs4_open_ops = {
1384 .rpc_release = nfs4_open_release, 1428 .rpc_release = nfs4_open_release,
1385}; 1429};
1386 1430
1387/* 1431static const struct rpc_call_ops nfs4_recover_open_ops = {
1388 * Note: On error, nfs4_proc_open will free the struct nfs4_opendata 1432 .rpc_call_prepare = nfs4_recover_open_prepare,
1389 */ 1433 .rpc_call_done = nfs4_open_done,
1390static int _nfs4_proc_open(struct nfs4_opendata *data) 1434 .rpc_release = nfs4_open_release,
1435};
1436
1437static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
1391{ 1438{
1392 struct inode *dir = data->dir->d_inode; 1439 struct inode *dir = data->dir->d_inode;
1393 struct nfs_server *server = NFS_SERVER(dir); 1440 struct nfs_server *server = NFS_SERVER(dir);
@@ -1414,21 +1461,57 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1414 data->rpc_done = 0; 1461 data->rpc_done = 0;
1415 data->rpc_status = 0; 1462 data->rpc_status = 0;
1416 data->cancelled = 0; 1463 data->cancelled = 0;
1464 if (isrecover)
1465 task_setup_data.callback_ops = &nfs4_recover_open_ops;
1417 task = rpc_run_task(&task_setup_data); 1466 task = rpc_run_task(&task_setup_data);
1418 if (IS_ERR(task)) 1467 if (IS_ERR(task))
1419 return PTR_ERR(task); 1468 return PTR_ERR(task);
1420 status = nfs4_wait_for_completion_rpc_task(task); 1469 status = nfs4_wait_for_completion_rpc_task(task);
1421 if (status != 0) { 1470 if (status != 0) {
1422 data->cancelled = 1; 1471 data->cancelled = 1;
1423 smp_wmb(); 1472 smp_wmb();
1424 } else 1473 } else
1425 status = data->rpc_status; 1474 status = data->rpc_status;
1426 rpc_put_task(task); 1475 rpc_put_task(task);
1476
1477 return status;
1478}
1479
1480static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
1481{
1482 struct inode *dir = data->dir->d_inode;
1483 struct nfs_openres *o_res = &data->o_res;
1484 int status;
1485
1486 status = nfs4_run_open_task(data, 1);
1427 if (status != 0 || !data->rpc_done) 1487 if (status != 0 || !data->rpc_done)
1428 return status; 1488 return status;
1429 1489
1430 if (o_res->fh.size == 0) 1490 nfs_refresh_inode(dir, o_res->dir_attr);
1431 _nfs4_proc_lookup(dir, o_arg->name, &o_res->fh, o_res->f_attr); 1491
1492 if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
1493 status = _nfs4_proc_open_confirm(data);
1494 if (status != 0)
1495 return status;
1496 }
1497
1498 return status;
1499}
1500
1501/*
1502 * Note: On error, nfs4_proc_open will free the struct nfs4_opendata
1503 */
1504static int _nfs4_proc_open(struct nfs4_opendata *data)
1505{
1506 struct inode *dir = data->dir->d_inode;
1507 struct nfs_server *server = NFS_SERVER(dir);
1508 struct nfs_openargs *o_arg = &data->o_arg;
1509 struct nfs_openres *o_res = &data->o_res;
1510 int status;
1511
1512 status = nfs4_run_open_task(data, 0);
1513 if (status != 0 || !data->rpc_done)
1514 return status;
1432 1515
1433 if (o_arg->open_flags & O_CREAT) { 1516 if (o_arg->open_flags & O_CREAT) {
1434 update_changeattr(dir, &o_res->cinfo); 1517 update_changeattr(dir, &o_res->cinfo);
@@ -1752,11 +1835,10 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1752 if (calldata->arg.fmode == 0) 1835 if (calldata->arg.fmode == 0)
1753 break; 1836 break;
1754 default: 1837 default:
1755 if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { 1838 if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
1756 nfs_restart_rpc(task, server->nfs_client); 1839 rpc_restart_call_prepare(task);
1757 return;
1758 }
1759 } 1840 }
1841 nfs_release_seqid(calldata->arg.seqid);
1760 nfs_refresh_inode(calldata->inode, calldata->res.fattr); 1842 nfs_refresh_inode(calldata->inode, calldata->res.fattr);
1761} 1843}
1762 1844
@@ -1848,8 +1930,6 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
1848 calldata->state = state; 1930 calldata->state = state;
1849 calldata->arg.fh = NFS_FH(state->inode); 1931 calldata->arg.fh = NFS_FH(state->inode);
1850 calldata->arg.stateid = &state->open_stateid; 1932 calldata->arg.stateid = &state->open_stateid;
1851 if (nfs4_has_session(server->nfs_client))
1852 memset(calldata->arg.stateid->data, 0, 4); /* clear seqid */
1853 /* Serialization for the sequence id */ 1933 /* Serialization for the sequence id */
1854 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); 1934 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
1855 if (calldata->arg.seqid == NULL) 1935 if (calldata->arg.seqid == NULL)
@@ -3941,6 +4021,12 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
3941 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); 4021 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
3942} 4022}
3943 4023
4024static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata)
4025{
4026 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
4027 nfs4_lock_prepare(task, calldata);
4028}
4029
3944static void nfs4_lock_done(struct rpc_task *task, void *calldata) 4030static void nfs4_lock_done(struct rpc_task *task, void *calldata)
3945{ 4031{
3946 struct nfs4_lockdata *data = calldata; 4032 struct nfs4_lockdata *data = calldata;
@@ -3996,7 +4082,13 @@ static const struct rpc_call_ops nfs4_lock_ops = {
3996 .rpc_release = nfs4_lock_release, 4082 .rpc_release = nfs4_lock_release,
3997}; 4083};
3998 4084
3999static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int reclaim) 4085static const struct rpc_call_ops nfs4_recover_lock_ops = {
4086 .rpc_call_prepare = nfs4_recover_lock_prepare,
4087 .rpc_call_done = nfs4_lock_done,
4088 .rpc_release = nfs4_lock_release,
4089};
4090
4091static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type)
4000{ 4092{
4001 struct nfs4_lockdata *data; 4093 struct nfs4_lockdata *data;
4002 struct rpc_task *task; 4094 struct rpc_task *task;
@@ -4020,8 +4112,11 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
4020 return -ENOMEM; 4112 return -ENOMEM;
4021 if (IS_SETLKW(cmd)) 4113 if (IS_SETLKW(cmd))
4022 data->arg.block = 1; 4114 data->arg.block = 1;
4023 if (reclaim != 0) 4115 if (recovery_type > NFS_LOCK_NEW) {
4024 data->arg.reclaim = 1; 4116 if (recovery_type == NFS_LOCK_RECLAIM)
4117 data->arg.reclaim = NFS_LOCK_RECLAIM;
4118 task_setup_data.callback_ops = &nfs4_recover_lock_ops;
4119 }
4025 msg.rpc_argp = &data->arg, 4120 msg.rpc_argp = &data->arg,
4026 msg.rpc_resp = &data->res, 4121 msg.rpc_resp = &data->res,
4027 task_setup_data.callback_data = data; 4122 task_setup_data.callback_data = data;
@@ -4048,7 +4143,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
4048 /* Cache the lock if possible... */ 4143 /* Cache the lock if possible... */
4049 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) 4144 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
4050 return 0; 4145 return 0;
4051 err = _nfs4_do_setlk(state, F_SETLK, request, 1); 4146 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
4052 if (err != -NFS4ERR_DELAY) 4147 if (err != -NFS4ERR_DELAY)
4053 break; 4148 break;
4054 nfs4_handle_exception(server, err, &exception); 4149 nfs4_handle_exception(server, err, &exception);
@@ -4068,7 +4163,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
4068 do { 4163 do {
4069 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) 4164 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
4070 return 0; 4165 return 0;
4071 err = _nfs4_do_setlk(state, F_SETLK, request, 0); 4166 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_EXPIRED);
4072 switch (err) { 4167 switch (err) {
4073 default: 4168 default:
4074 goto out; 4169 goto out;
@@ -4104,7 +4199,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
4104 status = do_vfs_lock(request->fl_file, request); 4199 status = do_vfs_lock(request->fl_file, request);
4105 goto out_unlock; 4200 goto out_unlock;
4106 } 4201 }
4107 status = _nfs4_do_setlk(state, cmd, request, 0); 4202 status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
4108 if (status != 0) 4203 if (status != 0)
4109 goto out_unlock; 4204 goto out_unlock;
4110 /* Note: we always want to sleep here! */ 4205 /* Note: we always want to sleep here! */
@@ -4187,7 +4282,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4187 if (err != 0) 4282 if (err != 0)
4188 goto out; 4283 goto out;
4189 do { 4284 do {
4190 err = _nfs4_do_setlk(state, F_SETLK, fl, 0); 4285 err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
4191 switch (err) { 4286 switch (err) {
4192 default: 4287 default:
4193 printk(KERN_ERR "%s: unhandled error %d.\n", 4288 printk(KERN_ERR "%s: unhandled error %d.\n",
@@ -4395,11 +4490,12 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task,
4395 (struct nfs4_get_lease_time_data *)calldata; 4490 (struct nfs4_get_lease_time_data *)calldata;
4396 4491
4397 dprintk("--> %s\n", __func__); 4492 dprintk("--> %s\n", __func__);
4493 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
4398 /* just setup sequence, do not trigger session recovery 4494 /* just setup sequence, do not trigger session recovery
4399 since we're invoked within one */ 4495 since we're invoked within one */
4400 ret = nfs41_setup_sequence(data->clp->cl_session, 4496 ret = nfs41_setup_sequence(data->clp->cl_session,
4401 &data->args->la_seq_args, 4497 &data->args->la_seq_args,
4402 &data->res->lr_seq_res, 0, task); 4498 &data->res->lr_seq_res, 0, task);
4403 4499
4404 BUG_ON(ret == -EAGAIN); 4500 BUG_ON(ret == -EAGAIN);
4405 rpc_call_start(task); 4501 rpc_call_start(task);
@@ -4619,7 +4715,7 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
4619 tbl = &session->fc_slot_table; 4715 tbl = &session->fc_slot_table;
4620 tbl->highest_used_slotid = -1; 4716 tbl->highest_used_slotid = -1;
4621 spin_lock_init(&tbl->slot_tbl_lock); 4717 spin_lock_init(&tbl->slot_tbl_lock);
4622 rpc_init_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); 4718 rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
4623 4719
4624 tbl = &session->bc_slot_table; 4720 tbl = &session->bc_slot_table;
4625 tbl->highest_used_slotid = -1; 4721 tbl->highest_used_slotid = -1;
@@ -4838,14 +4934,22 @@ int nfs4_init_session(struct nfs_server *server)
4838{ 4934{
4839 struct nfs_client *clp = server->nfs_client; 4935 struct nfs_client *clp = server->nfs_client;
4840 struct nfs4_session *session; 4936 struct nfs4_session *session;
4937 unsigned int rsize, wsize;
4841 int ret; 4938 int ret;
4842 4939
4843 if (!nfs4_has_session(clp)) 4940 if (!nfs4_has_session(clp))
4844 return 0; 4941 return 0;
4845 4942
4943 rsize = server->rsize;
4944 if (rsize == 0)
4945 rsize = NFS_MAX_FILE_IO_SIZE;
4946 wsize = server->wsize;
4947 if (wsize == 0)
4948 wsize = NFS_MAX_FILE_IO_SIZE;
4949
4846 session = clp->cl_session; 4950 session = clp->cl_session;
4847 session->fc_attrs.max_rqst_sz = server->wsize + nfs41_maxwrite_overhead; 4951 session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
4848 session->fc_attrs.max_resp_sz = server->rsize + nfs41_maxread_overhead; 4952 session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
4849 4953
4850 ret = nfs4_recover_expired_lease(server); 4954 ret = nfs4_recover_expired_lease(server);
4851 if (!ret) 4955 if (!ret)
@@ -4871,7 +4975,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
4871 args.sa_cache_this = 0; 4975 args.sa_cache_this = 0;
4872 4976
4873 return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args, 4977 return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args,
4874 &res, 0); 4978 &res, args.sa_cache_this, 1);
4875} 4979}
4876 4980
4877void nfs41_sequence_call_done(struct rpc_task *task, void *data) 4981void nfs41_sequence_call_done(struct rpc_task *task, void *data)
@@ -4953,6 +5057,7 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data)
4953{ 5057{
4954 struct nfs4_reclaim_complete_data *calldata = data; 5058 struct nfs4_reclaim_complete_data *calldata = data;
4955 5059
5060 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
4956 if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args, 5061 if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args,
4957 &calldata->res.seq_res, 0, task)) 5062 &calldata->res.seq_res, 0, task))
4958 return; 5063 return;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e76427e6346f..6d263ed79e92 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -135,16 +135,30 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
135 return status; 135 return status;
136} 136}
137 137
138static void nfs41_end_drain_session(struct nfs_client *clp, 138static void nfs4_end_drain_session(struct nfs_client *clp)
139 struct nfs4_session *ses)
140{ 139{
141 if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) 140 struct nfs4_session *ses = clp->cl_session;
142 rpc_wake_up(&ses->fc_slot_table.slot_tbl_waitq); 141 int max_slots;
142
143 if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
144 spin_lock(&ses->fc_slot_table.slot_tbl_lock);
145 max_slots = ses->fc_slot_table.max_slots;
146 while (max_slots--) {
147 struct rpc_task *task;
148
149 task = rpc_wake_up_next(&ses->fc_slot_table.
150 slot_tbl_waitq);
151 if (!task)
152 break;
153 rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
154 }
155 spin_unlock(&ses->fc_slot_table.slot_tbl_lock);
156 }
143} 157}
144 158
145static int nfs41_begin_drain_session(struct nfs_client *clp, 159static int nfs4_begin_drain_session(struct nfs_client *clp)
146 struct nfs4_session *ses)
147{ 160{
161 struct nfs4_session *ses = clp->cl_session;
148 struct nfs4_slot_table *tbl = &ses->fc_slot_table; 162 struct nfs4_slot_table *tbl = &ses->fc_slot_table;
149 163
150 spin_lock(&tbl->slot_tbl_lock); 164 spin_lock(&tbl->slot_tbl_lock);
@@ -162,16 +176,13 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
162{ 176{
163 int status; 177 int status;
164 178
165 status = nfs41_begin_drain_session(clp, clp->cl_session); 179 nfs4_begin_drain_session(clp);
166 if (status != 0)
167 goto out;
168 status = nfs4_proc_exchange_id(clp, cred); 180 status = nfs4_proc_exchange_id(clp, cred);
169 if (status != 0) 181 if (status != 0)
170 goto out; 182 goto out;
171 status = nfs4_proc_create_session(clp); 183 status = nfs4_proc_create_session(clp);
172 if (status != 0) 184 if (status != 0)
173 goto out; 185 goto out;
174 nfs41_end_drain_session(clp, clp->cl_session);
175 nfs41_setup_state_renewal(clp); 186 nfs41_setup_state_renewal(clp);
176 nfs_mark_client_ready(clp, NFS_CS_READY); 187 nfs_mark_client_ready(clp, NFS_CS_READY);
177out: 188out:
@@ -755,16 +766,21 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
755 return new; 766 return new;
756} 767}
757 768
758void nfs_free_seqid(struct nfs_seqid *seqid) 769void nfs_release_seqid(struct nfs_seqid *seqid)
759{ 770{
760 if (!list_empty(&seqid->list)) { 771 if (!list_empty(&seqid->list)) {
761 struct rpc_sequence *sequence = seqid->sequence->sequence; 772 struct rpc_sequence *sequence = seqid->sequence->sequence;
762 773
763 spin_lock(&sequence->lock); 774 spin_lock(&sequence->lock);
764 list_del(&seqid->list); 775 list_del_init(&seqid->list);
765 spin_unlock(&sequence->lock); 776 spin_unlock(&sequence->lock);
766 rpc_wake_up(&sequence->wait); 777 rpc_wake_up(&sequence->wait);
767 } 778 }
779}
780
781void nfs_free_seqid(struct nfs_seqid *seqid)
782{
783 nfs_release_seqid(seqid);
768 kfree(seqid); 784 kfree(seqid);
769} 785}
770 786
@@ -1257,13 +1273,9 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
1257 1273
1258static int nfs4_reset_session(struct nfs_client *clp) 1274static int nfs4_reset_session(struct nfs_client *clp)
1259{ 1275{
1260 struct nfs4_session *ses = clp->cl_session;
1261 int status; 1276 int status;
1262 1277
1263 status = nfs41_begin_drain_session(clp, ses); 1278 nfs4_begin_drain_session(clp);
1264 if (status != 0)
1265 return status;
1266
1267 status = nfs4_proc_destroy_session(clp->cl_session); 1279 status = nfs4_proc_destroy_session(clp->cl_session);
1268 if (status && status != -NFS4ERR_BADSESSION && 1280 if (status && status != -NFS4ERR_BADSESSION &&
1269 status != -NFS4ERR_DEADSESSION) { 1281 status != -NFS4ERR_DEADSESSION) {
@@ -1279,19 +1291,17 @@ static int nfs4_reset_session(struct nfs_client *clp)
1279out: 1291out:
1280 /* 1292 /*
1281 * Let the state manager reestablish state 1293 * Let the state manager reestablish state
1282 * without waking other tasks yet.
1283 */ 1294 */
1284 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { 1295 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1285 /* Wake up the next rpc task */ 1296 status == 0)
1286 nfs41_end_drain_session(clp, ses); 1297 nfs41_setup_state_renewal(clp);
1287 if (status == 0) 1298
1288 nfs41_setup_state_renewal(clp);
1289 }
1290 return status; 1299 return status;
1291} 1300}
1292 1301
1293#else /* CONFIG_NFS_V4_1 */ 1302#else /* CONFIG_NFS_V4_1 */
1294static int nfs4_reset_session(struct nfs_client *clp) { return 0; } 1303static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
1304static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
1295#endif /* CONFIG_NFS_V4_1 */ 1305#endif /* CONFIG_NFS_V4_1 */
1296 1306
1297/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors 1307/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
@@ -1382,6 +1392,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1382 goto out_error; 1392 goto out_error;
1383 } 1393 }
1384 1394
1395 nfs4_end_drain_session(clp);
1385 if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) { 1396 if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
1386 nfs_client_return_marked_delegations(clp); 1397 nfs_client_return_marked_delegations(clp);
1387 continue; 1398 continue;
@@ -1398,6 +1409,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1398out_error: 1409out_error:
1399 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" 1410 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s"
1400 " with error %d\n", clp->cl_hostname, -status); 1411 " with error %d\n", clp->cl_hostname, -status);
1412 nfs4_end_drain_session(clp);
1401 nfs4_clear_state_manager_bit(clp); 1413 nfs4_clear_state_manager_bit(clp);
1402} 1414}
1403 1415
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index 8f9a20556f79..d3854d94b7cf 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -7,8 +7,6 @@
7#include <linux/types.h> 7#include <linux/types.h>
8#include <linux/file.h> 8#include <linux/file.h>
9#include <linux/fs.h> 9#include <linux/fs.h>
10#include <linux/sunrpc/svc.h>
11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/syscall.h> 10#include <linux/nfsd/syscall.h>
13#include <linux/cred.h> 11#include <linux/cred.h>
14#include <linux/sched.h> 12#include <linux/sched.h>
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 36fcabbf5186..79717a40daba 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -1,15 +1,7 @@
1/* 1/* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
2 * linux/fs/nfsd/auth.c
3 *
4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
5 */
6 2
7#include <linux/types.h>
8#include <linux/sched.h> 3#include <linux/sched.h>
9#include <linux/sunrpc/svc.h> 4#include "nfsd.h"
10#include <linux/sunrpc/svcauth.h>
11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/export.h>
13#include "auth.h" 5#include "auth.h"
14 6
15int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) 7int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
new file mode 100644
index 000000000000..d892be61016c
--- /dev/null
+++ b/fs/nfsd/cache.h
@@ -0,0 +1,83 @@
1/*
2 * Request reply cache. This was heavily inspired by the
3 * implementation in 4.3BSD/4.4BSD.
4 *
5 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
6 */
7
8#ifndef NFSCACHE_H
9#define NFSCACHE_H
10
11#include <linux/sunrpc/svc.h>
12
13/*
14 * Representation of a reply cache entry.
15 */
16struct svc_cacherep {
17 struct hlist_node c_hash;
18 struct list_head c_lru;
19
20 unsigned char c_state, /* unused, inprog, done */
21 c_type, /* status, buffer */
22 c_secure : 1; /* req came from port < 1024 */
23 struct sockaddr_in c_addr;
24 __be32 c_xid;
25 u32 c_prot;
26 u32 c_proc;
27 u32 c_vers;
28 unsigned long c_timestamp;
29 union {
30 struct kvec u_vec;
31 __be32 u_status;
32 } c_u;
33};
34
35#define c_replvec c_u.u_vec
36#define c_replstat c_u.u_status
37
38/* cache entry states */
39enum {
40 RC_UNUSED,
41 RC_INPROG,
42 RC_DONE
43};
44
45/* return values */
46enum {
47 RC_DROPIT,
48 RC_REPLY,
49 RC_DOIT,
50 RC_INTR
51};
52
53/*
54 * Cache types.
55 * We may want to add more types one day, e.g. for diropres and
56 * attrstat replies. Using cache entries with fixed length instead
57 * of buffer pointers may be more efficient.
58 */
59enum {
60 RC_NOCACHE,
61 RC_REPLSTAT,
62 RC_REPLBUFF,
63};
64
65/*
66 * If requests are retransmitted within this interval, they're dropped.
67 */
68#define RC_DELAY (HZ/5)
69
70int nfsd_reply_cache_init(void);
71void nfsd_reply_cache_shutdown(void);
72int nfsd_cache_lookup(struct svc_rqst *, int);
73void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
74
75#ifdef CONFIG_NFSD_V4
76void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
77#else /* CONFIG_NFSD_V4 */
78static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
79{
80}
81#endif /* CONFIG_NFSD_V4 */
82
83#endif /* NFSCACHE_H */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c1c9e035d4a4..c487810a2366 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1,7 +1,5 @@
1#define MSNFS /* HACK HACK */ 1#define MSNFS /* HACK HACK */
2/* 2/*
3 * linux/fs/nfsd/export.c
4 *
5 * NFS exporting and validation. 3 * NFS exporting and validation.
6 * 4 *
7 * We maintain a list of clients, each of which has a list of 5 * We maintain a list of clients, each of which has a list of
@@ -14,29 +12,16 @@
14 * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de> 12 * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de>
15 */ 13 */
16 14
17#include <linux/unistd.h>
18#include <linux/slab.h>
19#include <linux/stat.h>
20#include <linux/in.h>
21#include <linux/seq_file.h>
22#include <linux/syscalls.h>
23#include <linux/rwsem.h>
24#include <linux/dcache.h>
25#include <linux/namei.h> 15#include <linux/namei.h>
26#include <linux/mount.h>
27#include <linux/hash.h>
28#include <linux/module.h> 16#include <linux/module.h>
29#include <linux/exportfs.h> 17#include <linux/exportfs.h>
30 18
31#include <linux/sunrpc/svc.h>
32#include <linux/nfsd/nfsd.h>
33#include <linux/nfsd/nfsfh.h>
34#include <linux/nfsd/syscall.h> 19#include <linux/nfsd/syscall.h>
35#include <linux/lockd/bind.h>
36#include <linux/sunrpc/msg_prot.h>
37#include <linux/sunrpc/gss_api.h>
38#include <net/ipv6.h> 20#include <net/ipv6.h>
39 21
22#include "nfsd.h"
23#include "nfsfh.h"
24
40#define NFSDDBG_FACILITY NFSDDBG_EXPORT 25#define NFSDDBG_FACILITY NFSDDBG_EXPORT
41 26
42typedef struct auth_domain svc_client; 27typedef struct auth_domain svc_client;
@@ -369,16 +354,25 @@ static struct svc_export *svc_export_update(struct svc_export *new,
369 struct svc_export *old); 354 struct svc_export *old);
370static struct svc_export *svc_export_lookup(struct svc_export *); 355static struct svc_export *svc_export_lookup(struct svc_export *);
371 356
372static int check_export(struct inode *inode, int flags, unsigned char *uuid) 357static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
373{ 358{
374 359
375 /* We currently export only dirs and regular files. 360 /*
376 * This is what umountd does. 361 * We currently export only dirs, regular files, and (for v4
362 * pseudoroot) symlinks.
377 */ 363 */
378 if (!S_ISDIR(inode->i_mode) && 364 if (!S_ISDIR(inode->i_mode) &&
365 !S_ISLNK(inode->i_mode) &&
379 !S_ISREG(inode->i_mode)) 366 !S_ISREG(inode->i_mode))
380 return -ENOTDIR; 367 return -ENOTDIR;
381 368
369 /*
370 * Mountd should never pass down a writeable V4ROOT export, but,
371 * just to make sure:
372 */
373 if (*flags & NFSEXP_V4ROOT)
374 *flags |= NFSEXP_READONLY;
375
382 /* There are two requirements on a filesystem to be exportable. 376 /* There are two requirements on a filesystem to be exportable.
383 * 1: We must be able to identify the filesystem from a number. 377 * 1: We must be able to identify the filesystem from a number.
384 * either a device number (so FS_REQUIRES_DEV needed) 378 * either a device number (so FS_REQUIRES_DEV needed)
@@ -387,7 +381,7 @@ static int check_export(struct inode *inode, int flags, unsigned char *uuid)
387 * This means that s_export_op must be set. 381 * This means that s_export_op must be set.
388 */ 382 */
389 if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) && 383 if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
390 !(flags & NFSEXP_FSID) && 384 !(*flags & NFSEXP_FSID) &&
391 uuid == NULL) { 385 uuid == NULL) {
392 dprintk("exp_export: export of non-dev fs without fsid\n"); 386 dprintk("exp_export: export of non-dev fs without fsid\n");
393 return -EINVAL; 387 return -EINVAL;
@@ -602,7 +596,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
602 goto out4; 596 goto out4;
603 } 597 }
604 598
605 err = check_export(exp.ex_path.dentry->d_inode, exp.ex_flags, 599 err = check_export(exp.ex_path.dentry->d_inode, &exp.ex_flags,
606 exp.ex_uuid); 600 exp.ex_uuid);
607 if (err) 601 if (err)
608 goto out4; 602 goto out4;
@@ -1041,7 +1035,7 @@ exp_export(struct nfsctl_export *nxp)
1041 goto finish; 1035 goto finish;
1042 } 1036 }
1043 1037
1044 err = check_export(path.dentry->d_inode, nxp->ex_flags, NULL); 1038 err = check_export(path.dentry->d_inode, &nxp->ex_flags, NULL);
1045 if (err) goto finish; 1039 if (err) goto finish;
1046 1040
1047 err = -ENOMEM; 1041 err = -ENOMEM;
@@ -1320,6 +1314,23 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
1320 return exp; 1314 return exp;
1321} 1315}
1322 1316
1317static struct svc_export *find_fsidzero_export(struct svc_rqst *rqstp)
1318{
1319 struct svc_export *exp;
1320 u32 fsidv[2];
1321
1322 mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
1323
1324 exp = rqst_exp_find(rqstp, FSID_NUM, fsidv);
1325 /*
1326 * We shouldn't have accepting an nfsv4 request at all if we
1327 * don't have a pseudoexport!:
1328 */
1329 if (IS_ERR(exp) && PTR_ERR(exp) == -ENOENT)
1330 exp = ERR_PTR(-ESERVERFAULT);
1331 return exp;
1332}
1333
1323/* 1334/*
1324 * Called when we need the filehandle for the root of the pseudofs, 1335 * Called when we need the filehandle for the root of the pseudofs,
1325 * for a given NFSv4 client. The root is defined to be the 1336 * for a given NFSv4 client. The root is defined to be the
@@ -1330,11 +1341,8 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
1330{ 1341{
1331 struct svc_export *exp; 1342 struct svc_export *exp;
1332 __be32 rv; 1343 __be32 rv;
1333 u32 fsidv[2];
1334 1344
1335 mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); 1345 exp = find_fsidzero_export(rqstp);
1336
1337 exp = rqst_exp_find(rqstp, FSID_NUM, fsidv);
1338 if (IS_ERR(exp)) 1346 if (IS_ERR(exp))
1339 return nfserrno(PTR_ERR(exp)); 1347 return nfserrno(PTR_ERR(exp));
1340 rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); 1348 rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL);
@@ -1425,6 +1433,7 @@ static struct flags {
1425 { NFSEXP_CROSSMOUNT, {"crossmnt", ""}}, 1433 { NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
1426 { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, 1434 { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
1427 { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}}, 1435 { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
1436 { NFSEXP_V4ROOT, {"v4root", ""}},
1428#ifdef MSNFS 1437#ifdef MSNFS
1429 { NFSEXP_MSNFS, {"msnfs", ""}}, 1438 { NFSEXP_MSNFS, {"msnfs", ""}},
1430#endif 1439#endif
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index b2786a5f9afe..0c6d81670137 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/lockd.c
3 *
4 * This file contains all the stubs needed when communicating with lockd. 2 * This file contains all the stubs needed when communicating with lockd.
5 * This level of indirection is necessary so we can run nfsd+lockd without 3 * This level of indirection is necessary so we can run nfsd+lockd without
6 * requiring the nfs client to be compiled in/loaded, and vice versa. 4 * requiring the nfs client to be compiled in/loaded, and vice versa.
@@ -8,14 +6,10 @@
8 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> 6 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
9 */ 7 */
10 8
11#include <linux/types.h>
12#include <linux/fs.h>
13#include <linux/file.h> 9#include <linux/file.h>
14#include <linux/mount.h>
15#include <linux/sunrpc/clnt.h>
16#include <linux/sunrpc/svc.h>
17#include <linux/nfsd/nfsd.h>
18#include <linux/lockd/bind.h> 10#include <linux/lockd/bind.h>
11#include "nfsd.h"
12#include "vfs.h"
19 13
20#define NFSDDBG_FACILITY NFSDDBG_LOCKD 14#define NFSDDBG_FACILITY NFSDDBG_LOCKD
21 15
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 4e3219e84116..f20589d2ae27 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -1,19 +1,15 @@
1/* 1/*
2 * linux/fs/nfsd/nfs2acl.c
3 *
4 * Process version 2 NFSACL requests. 2 * Process version 2 NFSACL requests.
5 * 3 *
6 * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> 4 * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
7 */ 5 */
8 6
9#include <linux/sunrpc/svc.h> 7#include "nfsd.h"
10#include <linux/nfs.h> 8/* FIXME: nfsacl.h is a broken header */
11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/cache.h>
13#include <linux/nfsd/xdr.h>
14#include <linux/nfsd/xdr3.h>
15#include <linux/posix_acl.h>
16#include <linux/nfsacl.h> 9#include <linux/nfsacl.h>
10#include "cache.h"
11#include "xdr3.h"
12#include "vfs.h"
17 13
18#define NFSDDBG_FACILITY NFSDDBG_PROC 14#define NFSDDBG_FACILITY NFSDDBG_PROC
19#define RETURN_STATUS(st) { resp->status = (st); return (st); } 15#define RETURN_STATUS(st) { resp->status = (st); return (st); }
@@ -217,6 +213,16 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
217 * XDR encode functions 213 * XDR encode functions
218 */ 214 */
219 215
216/*
217 * There must be an encoding function for void results so svc_process
218 * will work properly.
219 */
220int
221nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
222{
223 return xdr_ressize_check(rqstp, p);
224}
225
220/* GETACL */ 226/* GETACL */
221static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, 227static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
222 struct nfsd3_getaclres *resp) 228 struct nfsd3_getaclres *resp)
@@ -308,7 +314,6 @@ static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p,
308} 314}
309 315
310#define nfsaclsvc_decode_voidargs NULL 316#define nfsaclsvc_decode_voidargs NULL
311#define nfsaclsvc_encode_voidres NULL
312#define nfsaclsvc_release_void NULL 317#define nfsaclsvc_release_void NULL
313#define nfsd3_fhandleargs nfsd_fhandle 318#define nfsd3_fhandleargs nfsd_fhandle
314#define nfsd3_attrstatres nfsd_attrstat 319#define nfsd3_attrstatres nfsd_attrstat
@@ -346,5 +351,5 @@ struct svc_version nfsd_acl_version2 = {
346 .vs_proc = nfsd_acl_procedures2, 351 .vs_proc = nfsd_acl_procedures2,
347 .vs_dispatch = nfsd_dispatch, 352 .vs_dispatch = nfsd_dispatch,
348 .vs_xdrsize = NFS3_SVC_XDRSIZE, 353 .vs_xdrsize = NFS3_SVC_XDRSIZE,
349 .vs_hidden = 1, 354 .vs_hidden = 0,
350}; 355};
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 9981dbb377a3..e0c4846bad92 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -1,18 +1,15 @@
1/* 1/*
2 * linux/fs/nfsd/nfs3acl.c
3 *
4 * Process version 3 NFSACL requests. 2 * Process version 3 NFSACL requests.
5 * 3 *
6 * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> 4 * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
7 */ 5 */
8 6
9#include <linux/sunrpc/svc.h> 7#include "nfsd.h"
10#include <linux/nfs3.h> 8/* FIXME: nfsacl.h is a broken header */
11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/cache.h>
13#include <linux/nfsd/xdr3.h>
14#include <linux/posix_acl.h>
15#include <linux/nfsacl.h> 9#include <linux/nfsacl.h>
10#include "cache.h"
11#include "xdr3.h"
12#include "vfs.h"
16 13
17#define RETURN_STATUS(st) { resp->status = (st); return (st); } 14#define RETURN_STATUS(st) { resp->status = (st); return (st); }
18 15
@@ -264,6 +261,6 @@ struct svc_version nfsd_acl_version3 = {
264 .vs_proc = nfsd_acl_procedures3, 261 .vs_proc = nfsd_acl_procedures3,
265 .vs_dispatch = nfsd_dispatch, 262 .vs_dispatch = nfsd_dispatch,
266 .vs_xdrsize = NFS3_SVC_XDRSIZE, 263 .vs_xdrsize = NFS3_SVC_XDRSIZE,
267 .vs_hidden = 1, 264 .vs_hidden = 0,
268}; 265};
269 266
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index a713c418a922..3d68f45a37b9 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -1,30 +1,16 @@
1/* 1/*
2 * linux/fs/nfsd/nfs3proc.c
3 *
4 * Process version 3 NFS requests. 2 * Process version 3 NFS requests.
5 * 3 *
6 * Copyright (C) 1996, 1997, 1998 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1996, 1997, 1998 Olaf Kirch <okir@monad.swb.de>
7 */ 5 */
8 6
9#include <linux/linkage.h>
10#include <linux/time.h>
11#include <linux/errno.h>
12#include <linux/fs.h> 7#include <linux/fs.h>
13#include <linux/ext2_fs.h> 8#include <linux/ext2_fs.h>
14#include <linux/stat.h>
15#include <linux/fcntl.h>
16#include <linux/net.h>
17#include <linux/in.h>
18#include <linux/unistd.h>
19#include <linux/slab.h>
20#include <linux/major.h>
21#include <linux/magic.h> 9#include <linux/magic.h>
22 10
23#include <linux/sunrpc/svc.h> 11#include "cache.h"
24#include <linux/nfsd/nfsd.h> 12#include "xdr3.h"
25#include <linux/nfsd/cache.h> 13#include "vfs.h"
26#include <linux/nfsd/xdr3.h>
27#include <linux/nfs3.h>
28 14
29#define NFSDDBG_FACILITY NFSDDBG_PROC 15#define NFSDDBG_FACILITY NFSDDBG_PROC
30 16
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index d0a2ce1b4324..2a533a0af2a9 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfs3xdr.c
3 *
4 * XDR support for nfsd/protocol version 3. 2 * XDR support for nfsd/protocol version 3.
5 * 3 *
6 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
@@ -8,19 +6,8 @@
8 * 2003-08-09 Jamie Lokier: Use htonl() for nanoseconds, not htons()! 6 * 2003-08-09 Jamie Lokier: Use htonl() for nanoseconds, not htons()!
9 */ 7 */
10 8
11#include <linux/types.h>
12#include <linux/time.h>
13#include <linux/nfs3.h>
14#include <linux/list.h>
15#include <linux/spinlock.h>
16#include <linux/dcache.h>
17#include <linux/namei.h> 9#include <linux/namei.h>
18#include <linux/mm.h> 10#include "xdr3.h"
19#include <linux/vfs.h>
20#include <linux/sunrpc/xdr.h>
21#include <linux/sunrpc/svc.h>
22#include <linux/nfsd/nfsd.h>
23#include <linux/nfsd/xdr3.h>
24#include "auth.h" 11#include "auth.h"
25 12
26#define NFSDDBG_FACILITY NFSDDBG_XDR 13#define NFSDDBG_FACILITY NFSDDBG_XDR
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 725d02f210e2..88150685df34 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * fs/nfs4acl/acl.c
3 *
4 * Common NFSv4 ACL handling code. 2 * Common NFSv4 ACL handling code.
5 * 3 *
6 * Copyright (c) 2002, 2003 The Regents of the University of Michigan. 4 * Copyright (c) 2002, 2003 The Regents of the University of Michigan.
@@ -36,15 +34,7 @@
36 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 */ 35 */
38 36
39#include <linux/string.h>
40#include <linux/slab.h>
41#include <linux/list.h>
42#include <linux/types.h>
43#include <linux/fs.h>
44#include <linux/module.h>
45#include <linux/nfs_fs.h> 37#include <linux/nfs_fs.h>
46#include <linux/posix_acl.h>
47#include <linux/nfs4.h>
48#include <linux/nfs4_acl.h> 38#include <linux/nfs4_acl.h>
49 39
50 40
@@ -389,7 +379,7 @@ sort_pacl(struct posix_acl *pacl)
389 sort_pacl_range(pacl, 1, i-1); 379 sort_pacl_range(pacl, 1, i-1);
390 380
391 BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ); 381 BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ);
392 j = i++; 382 j = ++i;
393 while (pacl->a_entries[j].e_tag == ACL_GROUP) 383 while (pacl->a_entries[j].e_tag == ACL_GROUP)
394 j++; 384 j++;
395 sort_pacl_range(pacl, i, j-1); 385 sort_pacl_range(pacl, i, j-1);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 24e8d78f8dde..c6eed2a3b093 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfs4callback.c
3 *
4 * Copyright (c) 2001 The Regents of the University of Michigan. 2 * Copyright (c) 2001 The Regents of the University of Michigan.
5 * All rights reserved. 3 * All rights reserved.
6 * 4 *
@@ -33,22 +31,9 @@
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */ 32 */
35 33
36#include <linux/module.h>
37#include <linux/list.h>
38#include <linux/inet.h>
39#include <linux/errno.h>
40#include <linux/delay.h>
41#include <linux/sched.h>
42#include <linux/kthread.h>
43#include <linux/sunrpc/xdr.h>
44#include <linux/sunrpc/svc.h>
45#include <linux/sunrpc/clnt.h> 34#include <linux/sunrpc/clnt.h>
46#include <linux/sunrpc/svcsock.h> 35#include "nfsd.h"
47#include <linux/nfsd/nfsd.h> 36#include "state.h"
48#include <linux/nfsd/state.h>
49#include <linux/sunrpc/sched.h>
50#include <linux/nfs4.h>
51#include <linux/sunrpc/xprtsock.h>
52 37
53#define NFSDDBG_FACILITY NFSDDBG_PROC 38#define NFSDDBG_FACILITY NFSDDBG_PROC
54 39
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index ba2c199592fd..6e2983b27f3c 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * fs/nfsd/nfs4idmap.c
3 *
4 * Mapping of UID/GIDs to name and vice versa. 2 * Mapping of UID/GIDs to name and vice versa.
5 * 3 *
6 * Copyright (c) 2002, 2003 The Regents of the University of 4 * Copyright (c) 2002, 2003 The Regents of the University of
@@ -35,22 +33,9 @@
35 */ 33 */
36 34
37#include <linux/module.h> 35#include <linux/module.h>
38#include <linux/init.h>
39
40#include <linux/mm.h>
41#include <linux/errno.h>
42#include <linux/string.h>
43#include <linux/sunrpc/clnt.h>
44#include <linux/nfs.h>
45#include <linux/nfs4.h>
46#include <linux/nfs_fs.h>
47#include <linux/nfs_page.h>
48#include <linux/sunrpc/cache.h>
49#include <linux/nfsd_idmap.h> 36#include <linux/nfsd_idmap.h>
50#include <linux/list.h>
51#include <linux/time.h>
52#include <linux/seq_file.h> 37#include <linux/seq_file.h>
53#include <linux/sunrpc/svcauth.h> 38#include <linux/sched.h>
54 39
55/* 40/*
56 * Cache entry 41 * Cache entry
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bebc0c2e1b0a..37514c469846 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * fs/nfsd/nfs4proc.c
3 *
4 * Server-side procedures for NFSv4. 2 * Server-side procedures for NFSv4.
5 * 3 *
6 * Copyright (c) 2002 The Regents of the University of Michigan. 4 * Copyright (c) 2002 The Regents of the University of Michigan.
@@ -34,20 +32,11 @@
34 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */ 34 */
37
38#include <linux/param.h>
39#include <linux/major.h>
40#include <linux/slab.h>
41#include <linux/file.h> 35#include <linux/file.h>
42 36
43#include <linux/sunrpc/svc.h> 37#include "cache.h"
44#include <linux/nfsd/nfsd.h> 38#include "xdr4.h"
45#include <linux/nfsd/cache.h> 39#include "vfs.h"
46#include <linux/nfs4.h>
47#include <linux/nfsd/state.h>
48#include <linux/nfsd/xdr4.h>
49#include <linux/nfs4_acl.h>
50#include <linux/sunrpc/gss_api.h>
51 40
52#define NFSDDBG_FACILITY NFSDDBG_PROC 41#define NFSDDBG_FACILITY NFSDDBG_PROC
53 42
@@ -170,7 +159,7 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
170 accmode |= NFSD_MAY_READ; 159 accmode |= NFSD_MAY_READ;
171 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 160 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
172 accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC); 161 accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC);
173 if (open->op_share_deny & NFS4_SHARE_DENY_WRITE) 162 if (open->op_share_deny & NFS4_SHARE_DENY_READ)
174 accmode |= NFSD_MAY_WRITE; 163 accmode |= NFSD_MAY_WRITE;
175 164
176 status = fh_verify(rqstp, current_fh, S_IFREG, accmode); 165 status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index b5348405046b..5a754f7b71ed 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -1,6 +1,4 @@
1/* 1/*
2* linux/fs/nfsd/nfs4recover.c
3*
4* Copyright (c) 2004 The Regents of the University of Michigan. 2* Copyright (c) 2004 The Regents of the University of Michigan.
5* All rights reserved. 3* All rights reserved.
6* 4*
@@ -33,20 +31,14 @@
33* 31*
34*/ 32*/
35 33
36#include <linux/err.h>
37#include <linux/sunrpc/svc.h>
38#include <linux/nfsd/nfsd.h>
39#include <linux/nfs4.h>
40#include <linux/nfsd/state.h>
41#include <linux/nfsd/xdr4.h>
42#include <linux/param.h>
43#include <linux/file.h> 34#include <linux/file.h>
44#include <linux/namei.h> 35#include <linux/namei.h>
45#include <asm/uaccess.h>
46#include <linux/scatterlist.h>
47#include <linux/crypto.h> 36#include <linux/crypto.h>
48#include <linux/sched.h> 37#include <linux/sched.h>
49#include <linux/mount.h> 38
39#include "nfsd.h"
40#include "state.h"
41#include "vfs.h"
50 42
51#define NFSDDBG_FACILITY NFSDDBG_PROC 43#define NFSDDBG_FACILITY NFSDDBG_PROC
52 44
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2153f9bdbebd..f19ed866c95f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1,6 +1,4 @@
1/* 1/*
2* linux/fs/nfsd/nfs4state.c
3*
4* Copyright (c) 2001 The Regents of the University of Michigan. 2* Copyright (c) 2001 The Regents of the University of Michigan.
5* All rights reserved. 3* All rights reserved.
6* 4*
@@ -34,28 +32,14 @@
34* 32*
35*/ 33*/
36 34
37#include <linux/param.h>
38#include <linux/major.h>
39#include <linux/slab.h>
40
41#include <linux/sunrpc/svc.h>
42#include <linux/nfsd/nfsd.h>
43#include <linux/nfsd/cache.h>
44#include <linux/file.h> 35#include <linux/file.h>
45#include <linux/mount.h>
46#include <linux/workqueue.h>
47#include <linux/smp_lock.h> 36#include <linux/smp_lock.h>
48#include <linux/kthread.h>
49#include <linux/nfs4.h>
50#include <linux/nfsd/state.h>
51#include <linux/nfsd/xdr4.h>
52#include <linux/namei.h> 37#include <linux/namei.h>
53#include <linux/swap.h> 38#include <linux/swap.h>
54#include <linux/mutex.h>
55#include <linux/lockd/bind.h>
56#include <linux/module.h>
57#include <linux/sunrpc/svcauth_gss.h> 39#include <linux/sunrpc/svcauth_gss.h>
58#include <linux/sunrpc/clnt.h> 40#include <linux/sunrpc/clnt.h>
41#include "xdr4.h"
42#include "vfs.h"
59 43
60#define NFSDDBG_FACILITY NFSDDBG_PROC 44#define NFSDDBG_FACILITY NFSDDBG_PROC
61 45
@@ -477,13 +461,14 @@ static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan)
477 461
478/* 462/*
479 * fchan holds the client values on input, and the server values on output 463 * fchan holds the client values on input, and the server values on output
464 * sv_max_mesg is the maximum payload plus one page for overhead.
480 */ 465 */
481static int init_forechannel_attrs(struct svc_rqst *rqstp, 466static int init_forechannel_attrs(struct svc_rqst *rqstp,
482 struct nfsd4_channel_attrs *session_fchan, 467 struct nfsd4_channel_attrs *session_fchan,
483 struct nfsd4_channel_attrs *fchan) 468 struct nfsd4_channel_attrs *fchan)
484{ 469{
485 int status = 0; 470 int status = 0;
486 __u32 maxcount = svc_max_payload(rqstp); 471 __u32 maxcount = nfsd_serv->sv_max_mesg;
487 472
488 /* headerpadsz set to zero in encode routine */ 473 /* headerpadsz set to zero in encode routine */
489 474
@@ -523,6 +508,15 @@ free_session_slots(struct nfsd4_session *ses)
523 kfree(ses->se_slots[i]); 508 kfree(ses->se_slots[i]);
524} 509}
525 510
511/*
512 * We don't actually need to cache the rpc and session headers, so we
513 * can allocate a little less for each slot:
514 */
515static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
516{
517 return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
518}
519
526static int 520static int
527alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, 521alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
528 struct nfsd4_create_session *cses) 522 struct nfsd4_create_session *cses)
@@ -554,7 +548,7 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
554 memcpy(new, &tmp, sizeof(*new)); 548 memcpy(new, &tmp, sizeof(*new));
555 549
556 /* allocate each struct nfsd4_slot and data cache in one piece */ 550 /* allocate each struct nfsd4_slot and data cache in one piece */
557 cachesize = new->se_fchannel.maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; 551 cachesize = slot_bytes(&new->se_fchannel);
558 for (i = 0; i < new->se_fchannel.maxreqs; i++) { 552 for (i = 0; i < new->se_fchannel.maxreqs; i++) {
559 sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL); 553 sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL);
560 if (!sp) 554 if (!sp)
@@ -628,10 +622,12 @@ void
628free_session(struct kref *kref) 622free_session(struct kref *kref)
629{ 623{
630 struct nfsd4_session *ses; 624 struct nfsd4_session *ses;
625 int mem;
631 626
632 ses = container_of(kref, struct nfsd4_session, se_ref); 627 ses = container_of(kref, struct nfsd4_session, se_ref);
633 spin_lock(&nfsd_drc_lock); 628 spin_lock(&nfsd_drc_lock);
634 nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE; 629 mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
630 nfsd_drc_mem_used -= mem;
635 spin_unlock(&nfsd_drc_lock); 631 spin_unlock(&nfsd_drc_lock);
636 free_session_slots(ses); 632 free_session_slots(ses);
637 kfree(ses); 633 kfree(ses);
@@ -2404,11 +2400,8 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2404 2400
2405 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); 2401 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
2406 2402
2407 dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n", 2403 dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
2408 dp->dl_stateid.si_boot, 2404 STATEID_VAL(&dp->dl_stateid));
2409 dp->dl_stateid.si_stateownerid,
2410 dp->dl_stateid.si_fileid,
2411 dp->dl_stateid.si_generation);
2412out: 2405out:
2413 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS 2406 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
2414 && flag == NFS4_OPEN_DELEGATE_NONE 2407 && flag == NFS4_OPEN_DELEGATE_NONE
@@ -2498,9 +2491,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
2498 2491
2499 status = nfs_ok; 2492 status = nfs_ok;
2500 2493
2501 dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n", 2494 dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
2502 stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid, 2495 STATEID_VAL(&stp->st_stateid));
2503 stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
2504out: 2496out:
2505 if (fp) 2497 if (fp)
2506 put_nfs4_file(fp); 2498 put_nfs4_file(fp);
@@ -2666,9 +2658,8 @@ STALE_STATEID(stateid_t *stateid)
2666{ 2658{
2667 if (time_after((unsigned long)boot_time, 2659 if (time_after((unsigned long)boot_time,
2668 (unsigned long)stateid->si_boot)) { 2660 (unsigned long)stateid->si_boot)) {
2669 dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n", 2661 dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
2670 stateid->si_boot, stateid->si_stateownerid, 2662 STATEID_VAL(stateid));
2671 stateid->si_fileid, stateid->si_generation);
2672 return 1; 2663 return 1;
2673 } 2664 }
2674 return 0; 2665 return 0;
@@ -2680,9 +2671,8 @@ EXPIRED_STATEID(stateid_t *stateid)
2680 if (time_before((unsigned long)boot_time, 2671 if (time_before((unsigned long)boot_time,
2681 ((unsigned long)stateid->si_boot)) && 2672 ((unsigned long)stateid->si_boot)) &&
2682 time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) { 2673 time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
2683 dprintk("NFSD: expired stateid (%08x/%08x/%08x/%08x)!\n", 2674 dprintk("NFSD: expired stateid " STATEID_FMT "!\n",
2684 stateid->si_boot, stateid->si_stateownerid, 2675 STATEID_VAL(stateid));
2685 stateid->si_fileid, stateid->si_generation);
2686 return 1; 2676 return 1;
2687 } 2677 }
2688 return 0; 2678 return 0;
@@ -2696,9 +2686,8 @@ stateid_error_map(stateid_t *stateid)
2696 if (EXPIRED_STATEID(stateid)) 2686 if (EXPIRED_STATEID(stateid))
2697 return nfserr_expired; 2687 return nfserr_expired;
2698 2688
2699 dprintk("NFSD: bad stateid (%08x/%08x/%08x/%08x)!\n", 2689 dprintk("NFSD: bad stateid " STATEID_FMT "!\n",
2700 stateid->si_boot, stateid->si_stateownerid, 2690 STATEID_VAL(stateid));
2701 stateid->si_fileid, stateid->si_generation);
2702 return nfserr_bad_stateid; 2691 return nfserr_bad_stateid;
2703} 2692}
2704 2693
@@ -2884,10 +2873,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
2884 struct svc_fh *current_fh = &cstate->current_fh; 2873 struct svc_fh *current_fh = &cstate->current_fh;
2885 __be32 status; 2874 __be32 status;
2886 2875
2887 dprintk("NFSD: preprocess_seqid_op: seqid=%d " 2876 dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
2888 "stateid = (%08x/%08x/%08x/%08x)\n", seqid, 2877 seqid, STATEID_VAL(stateid));
2889 stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
2890 stateid->si_generation);
2891 2878
2892 *stpp = NULL; 2879 *stpp = NULL;
2893 *sopp = NULL; 2880 *sopp = NULL;
@@ -3019,12 +3006,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3019 sop->so_confirmed = 1; 3006 sop->so_confirmed = 1;
3020 update_stateid(&stp->st_stateid); 3007 update_stateid(&stp->st_stateid);
3021 memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t)); 3008 memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t));
3022 dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d " 3009 dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
3023 "stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid, 3010 __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stateid));
3024 stp->st_stateid.si_boot,
3025 stp->st_stateid.si_stateownerid,
3026 stp->st_stateid.si_fileid,
3027 stp->st_stateid.si_generation);
3028 3011
3029 nfsd4_create_clid_dir(sop->so_client); 3012 nfsd4_create_clid_dir(sop->so_client);
3030out: 3013out:
@@ -3283,9 +3266,8 @@ find_delegation_stateid(struct inode *ino, stateid_t *stid)
3283 struct nfs4_file *fp; 3266 struct nfs4_file *fp;
3284 struct nfs4_delegation *dl; 3267 struct nfs4_delegation *dl;
3285 3268
3286 dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n", 3269 dprintk("NFSD: %s: stateid=" STATEID_FMT "\n", __func__,
3287 stid->si_boot, stid->si_stateownerid, 3270 STATEID_VAL(stid));
3288 stid->si_fileid, stid->si_generation);
3289 3271
3290 fp = find_file(ino); 3272 fp = find_file(ino);
3291 if (!fp) 3273 if (!fp)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0fbd50cee1f6..a8587e90fd5a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -40,24 +40,16 @@
40 * at the end of nfs4svc_decode_compoundargs. 40 * at the end of nfs4svc_decode_compoundargs.
41 */ 41 */
42 42
43#include <linux/param.h>
44#include <linux/smp.h>
45#include <linux/fs.h>
46#include <linux/namei.h> 43#include <linux/namei.h>
47#include <linux/vfs.h> 44#include <linux/statfs.h>
48#include <linux/utsname.h> 45#include <linux/utsname.h>
49#include <linux/sunrpc/xdr.h>
50#include <linux/sunrpc/svc.h>
51#include <linux/sunrpc/clnt.h>
52#include <linux/nfsd/nfsd.h>
53#include <linux/nfsd/state.h>
54#include <linux/nfsd/xdr4.h>
55#include <linux/nfsd_idmap.h> 46#include <linux/nfsd_idmap.h>
56#include <linux/nfs4.h>
57#include <linux/nfs4_acl.h> 47#include <linux/nfs4_acl.h>
58#include <linux/sunrpc/gss_api.h>
59#include <linux/sunrpc/svcauth_gss.h> 48#include <linux/sunrpc/svcauth_gss.h>
60 49
50#include "xdr4.h"
51#include "vfs.h"
52
61#define NFSDDBG_FACILITY NFSDDBG_XDR 53#define NFSDDBG_FACILITY NFSDDBG_XDR
62 54
63/* 55/*
@@ -2204,11 +2196,14 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
2204 * we will not follow the cross mount and will fill the attribtutes 2196 * we will not follow the cross mount and will fill the attribtutes
2205 * directly from the mountpoint dentry. 2197 * directly from the mountpoint dentry.
2206 */ 2198 */
2207 if (d_mountpoint(dentry) && !attributes_need_mount(cd->rd_bmval)) 2199 if (nfsd_mountpoint(dentry, exp)) {
2208 ignore_crossmnt = 1;
2209 else if (d_mountpoint(dentry)) {
2210 int err; 2200 int err;
2211 2201
2202 if (!(exp->ex_flags & NFSEXP_V4ROOT)
2203 && !attributes_need_mount(cd->rd_bmval)) {
2204 ignore_crossmnt = 1;
2205 goto out_encode;
2206 }
2212 /* 2207 /*
2213 * Why the heck aren't we just using nfsd_lookup?? 2208 * Why the heck aren't we just using nfsd_lookup??
2214 * Different "."/".." handling? Something else? 2209 * Different "."/".." handling? Something else?
@@ -2224,6 +2219,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
2224 goto out_put; 2219 goto out_put;
2225 2220
2226 } 2221 }
2222out_encode:
2227 nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, 2223 nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
2228 cd->rd_rqstp, ignore_crossmnt); 2224 cd->rd_rqstp, ignore_crossmnt);
2229out_put: 2225out_put:
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 4638635c5d87..da08560c4818 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfscache.c
3 *
4 * Request reply cache. This is currently a global cache, but this may 2 * Request reply cache. This is currently a global cache, but this may
5 * change in the future and be a per-client cache. 3 * change in the future and be a per-client cache.
6 * 4 *
@@ -10,16 +8,8 @@
10 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 8 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
11 */ 9 */
12 10
13#include <linux/kernel.h> 11#include "nfsd.h"
14#include <linux/time.h> 12#include "cache.h"
15#include <linux/slab.h>
16#include <linux/string.h>
17#include <linux/spinlock.h>
18#include <linux/list.h>
19
20#include <linux/sunrpc/svc.h>
21#include <linux/nfsd/nfsd.h>
22#include <linux/nfsd/cache.h>
23 13
24/* Size of reply cache. Common values are: 14/* Size of reply cache. Common values are:
25 * 4.3BSD: 128 15 * 4.3BSD: 128
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5c01fc148ce8..2604c3e70ea5 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1,46 +1,20 @@
1/* 1/*
2 * linux/fs/nfsd/nfsctl.c
3 *
4 * Syscall interface to knfsd. 2 * Syscall interface to knfsd.
5 * 3 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 */ 5 */
8 6
9#include <linux/module.h>
10
11#include <linux/linkage.h>
12#include <linux/time.h>
13#include <linux/errno.h>
14#include <linux/fs.h>
15#include <linux/namei.h> 7#include <linux/namei.h>
16#include <linux/fcntl.h>
17#include <linux/net.h>
18#include <linux/in.h>
19#include <linux/syscalls.h>
20#include <linux/unistd.h>
21#include <linux/slab.h>
22#include <linux/proc_fs.h>
23#include <linux/seq_file.h>
24#include <linux/pagemap.h>
25#include <linux/init.h>
26#include <linux/inet.h>
27#include <linux/string.h>
28#include <linux/ctype.h> 8#include <linux/ctype.h>
29 9
30#include <linux/nfs.h>
31#include <linux/nfsd_idmap.h> 10#include <linux/nfsd_idmap.h>
32#include <linux/lockd/bind.h>
33#include <linux/sunrpc/svc.h>
34#include <linux/sunrpc/svcsock.h> 11#include <linux/sunrpc/svcsock.h>
35#include <linux/nfsd/nfsd.h>
36#include <linux/nfsd/cache.h>
37#include <linux/nfsd/xdr.h>
38#include <linux/nfsd/syscall.h> 12#include <linux/nfsd/syscall.h>
39#include <linux/lockd/lockd.h> 13#include <linux/lockd/lockd.h>
40#include <linux/sunrpc/clnt.h> 14#include <linux/sunrpc/clnt.h>
41 15
42#include <asm/uaccess.h> 16#include "nfsd.h"
43#include <net/ipv6.h> 17#include "cache.h"
44 18
45/* 19/*
46 * We have a single directory with 9 nodes in it. 20 * We have a single directory with 9 nodes in it.
@@ -55,6 +29,7 @@ enum {
55 NFSD_Getfd, 29 NFSD_Getfd,
56 NFSD_Getfs, 30 NFSD_Getfs,
57 NFSD_List, 31 NFSD_List,
32 NFSD_Export_features,
58 NFSD_Fh, 33 NFSD_Fh,
59 NFSD_FO_UnlockIP, 34 NFSD_FO_UnlockIP,
60 NFSD_FO_UnlockFS, 35 NFSD_FO_UnlockFS,
@@ -173,6 +148,24 @@ static const struct file_operations exports_operations = {
173 .owner = THIS_MODULE, 148 .owner = THIS_MODULE,
174}; 149};
175 150
151static int export_features_show(struct seq_file *m, void *v)
152{
153 seq_printf(m, "0x%x 0x%x\n", NFSEXP_ALLFLAGS, NFSEXP_SECINFO_FLAGS);
154 return 0;
155}
156
157static int export_features_open(struct inode *inode, struct file *file)
158{
159 return single_open(file, export_features_show, NULL);
160}
161
162static struct file_operations export_features_operations = {
163 .open = export_features_open,
164 .read = seq_read,
165 .llseek = seq_lseek,
166 .release = single_release,
167};
168
176extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); 169extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
177extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); 170extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
178 171
@@ -1330,6 +1323,8 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1330 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, 1323 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
1331 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, 1324 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
1332 [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, 1325 [NFSD_List] = {"exports", &exports_operations, S_IRUGO},
1326 [NFSD_Export_features] = {"export_features",
1327 &export_features_operations, S_IRUGO},
1333 [NFSD_FO_UnlockIP] = {"unlock_ip", 1328 [NFSD_FO_UnlockIP] = {"unlock_ip",
1334 &transaction_ops, S_IWUSR|S_IRUSR}, 1329 &transaction_ops, S_IWUSR|S_IRUSR},
1335 [NFSD_FO_UnlockFS] = {"unlock_filesystem", 1330 [NFSD_FO_UnlockFS] = {"unlock_filesystem",
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
new file mode 100644
index 000000000000..e942a1aaac92
--- /dev/null
+++ b/fs/nfsd/nfsd.h
@@ -0,0 +1,338 @@
1/*
2 * Hodge-podge collection of knfsd-related stuff.
3 * I will sort this out later.
4 *
5 * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
6 */
7
8#ifndef LINUX_NFSD_NFSD_H
9#define LINUX_NFSD_NFSD_H
10
11#include <linux/types.h>
12#include <linux/mount.h>
13
14#include <linux/nfsd/debug.h>
15#include <linux/nfsd/export.h>
16#include <linux/nfsd/stats.h>
17/*
18 * nfsd version
19 */
20#define NFSD_SUPPORTED_MINOR_VERSION 1
21
22struct readdir_cd {
23 __be32 err; /* 0, nfserr, or nfserr_eof */
24};
25
26
27extern struct svc_program nfsd_program;
28extern struct svc_version nfsd_version2, nfsd_version3,
29 nfsd_version4;
30extern u32 nfsd_supported_minorversion;
31extern struct mutex nfsd_mutex;
32extern struct svc_serv *nfsd_serv;
33extern spinlock_t nfsd_drc_lock;
34extern unsigned int nfsd_drc_max_mem;
35extern unsigned int nfsd_drc_mem_used;
36
37extern const struct seq_operations nfs_exports_op;
38
39/*
40 * Function prototypes.
41 */
42int nfsd_svc(unsigned short port, int nrservs);
43int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
44
45int nfsd_nrthreads(void);
46int nfsd_nrpools(void);
47int nfsd_get_nrthreads(int n, int *);
48int nfsd_set_nrthreads(int n, int *);
49
50#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
51#ifdef CONFIG_NFSD_V2_ACL
52extern struct svc_version nfsd_acl_version2;
53#else
54#define nfsd_acl_version2 NULL
55#endif
56#ifdef CONFIG_NFSD_V3_ACL
57extern struct svc_version nfsd_acl_version3;
58#else
59#define nfsd_acl_version3 NULL
60#endif
61#endif
62
63enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
64int nfsd_vers(int vers, enum vers_op change);
65int nfsd_minorversion(u32 minorversion, enum vers_op change);
66void nfsd_reset_versions(void);
67int nfsd_create_serv(void);
68
69extern int nfsd_max_blksize;
70
71static inline int nfsd_v4client(struct svc_rqst *rq)
72{
73 return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
74}
75
76/*
77 * NFSv4 State
78 */
79#ifdef CONFIG_NFSD_V4
80extern unsigned int max_delegations;
81int nfs4_state_init(void);
82void nfsd4_free_slabs(void);
83int nfs4_state_start(void);
84void nfs4_state_shutdown(void);
85time_t nfs4_lease_time(void);
86void nfs4_reset_lease(time_t leasetime);
87int nfs4_reset_recoverydir(char *recdir);
88#else
89static inline int nfs4_state_init(void) { return 0; }
90static inline void nfsd4_free_slabs(void) { }
91static inline int nfs4_state_start(void) { return 0; }
92static inline void nfs4_state_shutdown(void) { }
93static inline time_t nfs4_lease_time(void) { return 0; }
94static inline void nfs4_reset_lease(time_t leasetime) { }
95static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
96#endif
97
98/*
99 * lockd binding
100 */
101void nfsd_lockd_init(void);
102void nfsd_lockd_shutdown(void);
103
104
105/*
106 * These macros provide pre-xdr'ed values for faster operation.
107 */
108#define nfs_ok cpu_to_be32(NFS_OK)
109#define nfserr_perm cpu_to_be32(NFSERR_PERM)
110#define nfserr_noent cpu_to_be32(NFSERR_NOENT)
111#define nfserr_io cpu_to_be32(NFSERR_IO)
112#define nfserr_nxio cpu_to_be32(NFSERR_NXIO)
113#define nfserr_eagain cpu_to_be32(NFSERR_EAGAIN)
114#define nfserr_acces cpu_to_be32(NFSERR_ACCES)
115#define nfserr_exist cpu_to_be32(NFSERR_EXIST)
116#define nfserr_xdev cpu_to_be32(NFSERR_XDEV)
117#define nfserr_nodev cpu_to_be32(NFSERR_NODEV)
118#define nfserr_notdir cpu_to_be32(NFSERR_NOTDIR)
119#define nfserr_isdir cpu_to_be32(NFSERR_ISDIR)
120#define nfserr_inval cpu_to_be32(NFSERR_INVAL)
121#define nfserr_fbig cpu_to_be32(NFSERR_FBIG)
122#define nfserr_nospc cpu_to_be32(NFSERR_NOSPC)
123#define nfserr_rofs cpu_to_be32(NFSERR_ROFS)
124#define nfserr_mlink cpu_to_be32(NFSERR_MLINK)
125#define nfserr_opnotsupp cpu_to_be32(NFSERR_OPNOTSUPP)
126#define nfserr_nametoolong cpu_to_be32(NFSERR_NAMETOOLONG)
127#define nfserr_notempty cpu_to_be32(NFSERR_NOTEMPTY)
128#define nfserr_dquot cpu_to_be32(NFSERR_DQUOT)
129#define nfserr_stale cpu_to_be32(NFSERR_STALE)
130#define nfserr_remote cpu_to_be32(NFSERR_REMOTE)
131#define nfserr_wflush cpu_to_be32(NFSERR_WFLUSH)
132#define nfserr_badhandle cpu_to_be32(NFSERR_BADHANDLE)
133#define nfserr_notsync cpu_to_be32(NFSERR_NOT_SYNC)
134#define nfserr_badcookie cpu_to_be32(NFSERR_BAD_COOKIE)
135#define nfserr_notsupp cpu_to_be32(NFSERR_NOTSUPP)
136#define nfserr_toosmall cpu_to_be32(NFSERR_TOOSMALL)
137#define nfserr_serverfault cpu_to_be32(NFSERR_SERVERFAULT)
138#define nfserr_badtype cpu_to_be32(NFSERR_BADTYPE)
139#define nfserr_jukebox cpu_to_be32(NFSERR_JUKEBOX)
140#define nfserr_denied cpu_to_be32(NFSERR_DENIED)
141#define nfserr_deadlock cpu_to_be32(NFSERR_DEADLOCK)
142#define nfserr_expired cpu_to_be32(NFSERR_EXPIRED)
143#define nfserr_bad_cookie cpu_to_be32(NFSERR_BAD_COOKIE)
144#define nfserr_same cpu_to_be32(NFSERR_SAME)
145#define nfserr_clid_inuse cpu_to_be32(NFSERR_CLID_INUSE)
146#define nfserr_stale_clientid cpu_to_be32(NFSERR_STALE_CLIENTID)
147#define nfserr_resource cpu_to_be32(NFSERR_RESOURCE)
148#define nfserr_moved cpu_to_be32(NFSERR_MOVED)
149#define nfserr_nofilehandle cpu_to_be32(NFSERR_NOFILEHANDLE)
150#define nfserr_minor_vers_mismatch cpu_to_be32(NFSERR_MINOR_VERS_MISMATCH)
151#define nfserr_share_denied cpu_to_be32(NFSERR_SHARE_DENIED)
152#define nfserr_stale_stateid cpu_to_be32(NFSERR_STALE_STATEID)
153#define nfserr_old_stateid cpu_to_be32(NFSERR_OLD_STATEID)
154#define nfserr_bad_stateid cpu_to_be32(NFSERR_BAD_STATEID)
155#define nfserr_bad_seqid cpu_to_be32(NFSERR_BAD_SEQID)
156#define nfserr_symlink cpu_to_be32(NFSERR_SYMLINK)
157#define nfserr_not_same cpu_to_be32(NFSERR_NOT_SAME)
158#define nfserr_restorefh cpu_to_be32(NFSERR_RESTOREFH)
159#define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP)
160#define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR)
161#define nfserr_openmode cpu_to_be32(NFSERR_OPENMODE)
162#define nfserr_locks_held cpu_to_be32(NFSERR_LOCKS_HELD)
163#define nfserr_op_illegal cpu_to_be32(NFSERR_OP_ILLEGAL)
164#define nfserr_grace cpu_to_be32(NFSERR_GRACE)
165#define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
166#define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
167#define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
168#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
169#define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
170#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
171#define nfserr_badiomode cpu_to_be32(NFS4ERR_BADIOMODE)
172#define nfserr_badlayout cpu_to_be32(NFS4ERR_BADLAYOUT)
173#define nfserr_bad_session_digest cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST)
174#define nfserr_badsession cpu_to_be32(NFS4ERR_BADSESSION)
175#define nfserr_badslot cpu_to_be32(NFS4ERR_BADSLOT)
176#define nfserr_complete_already cpu_to_be32(NFS4ERR_COMPLETE_ALREADY)
177#define nfserr_conn_not_bound_to_session cpu_to_be32(NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
178#define nfserr_deleg_already_wanted cpu_to_be32(NFS4ERR_DELEG_ALREADY_WANTED)
179#define nfserr_back_chan_busy cpu_to_be32(NFS4ERR_BACK_CHAN_BUSY)
180#define nfserr_layouttrylater cpu_to_be32(NFS4ERR_LAYOUTTRYLATER)
181#define nfserr_layoutunavailable cpu_to_be32(NFS4ERR_LAYOUTUNAVAILABLE)
182#define nfserr_nomatching_layout cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT)
183#define nfserr_recallconflict cpu_to_be32(NFS4ERR_RECALLCONFLICT)
184#define nfserr_unknown_layouttype cpu_to_be32(NFS4ERR_UNKNOWN_LAYOUTTYPE)
185#define nfserr_seq_misordered cpu_to_be32(NFS4ERR_SEQ_MISORDERED)
186#define nfserr_sequence_pos cpu_to_be32(NFS4ERR_SEQUENCE_POS)
187#define nfserr_req_too_big cpu_to_be32(NFS4ERR_REQ_TOO_BIG)
188#define nfserr_rep_too_big cpu_to_be32(NFS4ERR_REP_TOO_BIG)
189#define nfserr_rep_too_big_to_cache cpu_to_be32(NFS4ERR_REP_TOO_BIG_TO_CACHE)
190#define nfserr_retry_uncached_rep cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP)
191#define nfserr_unsafe_compound cpu_to_be32(NFS4ERR_UNSAFE_COMPOUND)
192#define nfserr_too_many_ops cpu_to_be32(NFS4ERR_TOO_MANY_OPS)
193#define nfserr_op_not_in_session cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION)
194#define nfserr_hash_alg_unsupp cpu_to_be32(NFS4ERR_HASH_ALG_UNSUPP)
195#define nfserr_clientid_busy cpu_to_be32(NFS4ERR_CLIENTID_BUSY)
196#define nfserr_pnfs_io_hole cpu_to_be32(NFS4ERR_PNFS_IO_HOLE)
197#define nfserr_seq_false_retry cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY)
198#define nfserr_bad_high_slot cpu_to_be32(NFS4ERR_BAD_HIGH_SLOT)
199#define nfserr_deadsession cpu_to_be32(NFS4ERR_DEADSESSION)
200#define nfserr_encr_alg_unsupp cpu_to_be32(NFS4ERR_ENCR_ALG_UNSUPP)
201#define nfserr_pnfs_no_layout cpu_to_be32(NFS4ERR_PNFS_NO_LAYOUT)
202#define nfserr_not_only_op cpu_to_be32(NFS4ERR_NOT_ONLY_OP)
203#define nfserr_wrong_cred cpu_to_be32(NFS4ERR_WRONG_CRED)
204#define nfserr_wrong_type cpu_to_be32(NFS4ERR_WRONG_TYPE)
205#define nfserr_dirdeleg_unavail cpu_to_be32(NFS4ERR_DIRDELEG_UNAVAIL)
206#define nfserr_reject_deleg cpu_to_be32(NFS4ERR_REJECT_DELEG)
207#define nfserr_returnconflict cpu_to_be32(NFS4ERR_RETURNCONFLICT)
208#define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED)
209
210/* error codes for internal use */
211/* if a request fails due to kmalloc failure, it gets dropped.
212 * Client should resend eventually
213 */
214#define nfserr_dropit cpu_to_be32(30000)
215/* end-of-file indicator in readdir */
216#define nfserr_eof cpu_to_be32(30001)
217/* replay detected */
218#define nfserr_replay_me cpu_to_be32(11001)
219/* nfs41 replay detected */
220#define nfserr_replay_cache cpu_to_be32(11002)
221
222/* Check for dir entries '.' and '..' */
223#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
224
225/*
226 * Time of server startup
227 */
228extern struct timeval nfssvc_boot;
229
230#ifdef CONFIG_NFSD_V4
231
232/* before processing a COMPOUND operation, we have to check that there
233 * is enough space in the buffer for XDR encode to succeed. otherwise,
234 * we might process an operation with side effects, and be unable to
235 * tell the client that the operation succeeded.
236 *
237 * COMPOUND_SLACK_SPACE - this is the minimum bytes of buffer space
238 * needed to encode an "ordinary" _successful_ operation. (GETATTR,
239 * READ, READDIR, and READLINK have their own buffer checks.) if we
240 * fall below this level, we fail the next operation with NFS4ERR_RESOURCE.
241 *
242 * COMPOUND_ERR_SLACK_SPACE - this is the minimum bytes of buffer space
243 * needed to encode an operation which has failed with NFS4ERR_RESOURCE.
244 * care is taken to ensure that we never fall below this level for any
245 * reason.
246 */
247#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */
248#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */
249
250#define NFSD_LEASE_TIME (nfs4_lease_time())
251#define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */
252
253/*
254 * The following attributes are currently not supported by the NFSv4 server:
255 * ARCHIVE (deprecated anyway)
256 * HIDDEN (unlikely to be supported any time soon)
257 * MIMETYPE (unlikely to be supported any time soon)
258 * QUOTA_* (will be supported in a forthcoming patch)
259 * SYSTEM (unlikely to be supported any time soon)
260 * TIME_BACKUP (unlikely to be supported any time soon)
261 * TIME_CREATE (unlikely to be supported any time soon)
262 */
263#define NFSD4_SUPPORTED_ATTRS_WORD0 \
264(FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_TYPE | FATTR4_WORD0_FH_EXPIRE_TYPE \
265 | FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE | FATTR4_WORD0_LINK_SUPPORT \
266 | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR | FATTR4_WORD0_FSID \
267 | FATTR4_WORD0_UNIQUE_HANDLES | FATTR4_WORD0_LEASE_TIME | FATTR4_WORD0_RDATTR_ERROR \
268 | FATTR4_WORD0_ACLSUPPORT | FATTR4_WORD0_CANSETTIME | FATTR4_WORD0_CASE_INSENSITIVE \
269 | FATTR4_WORD0_CASE_PRESERVING | FATTR4_WORD0_CHOWN_RESTRICTED \
270 | FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FILEID | FATTR4_WORD0_FILES_AVAIL \
271 | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_HOMOGENEOUS \
272 | FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME \
273 | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_ACL)
274
275#define NFSD4_SUPPORTED_ATTRS_WORD1 \
276(FATTR4_WORD1_MODE | FATTR4_WORD1_NO_TRUNC | FATTR4_WORD1_NUMLINKS \
277 | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \
278 | FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \
279 | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_ACCESS_SET \
280 | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \
281 | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
282
283#define NFSD4_SUPPORTED_ATTRS_WORD2 0
284
285#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
286 NFSD4_SUPPORTED_ATTRS_WORD0
287
288#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
289 NFSD4_SUPPORTED_ATTRS_WORD1
290
291#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
292 (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
293
294static inline u32 nfsd_suppattrs0(u32 minorversion)
295{
296 return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
297 : NFSD4_SUPPORTED_ATTRS_WORD0;
298}
299
300static inline u32 nfsd_suppattrs1(u32 minorversion)
301{
302 return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1
303 : NFSD4_SUPPORTED_ATTRS_WORD1;
304}
305
306static inline u32 nfsd_suppattrs2(u32 minorversion)
307{
308 return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2
309 : NFSD4_SUPPORTED_ATTRS_WORD2;
310}
311
312/* These will return ERR_INVAL if specified in GETATTR or READDIR. */
313#define NFSD_WRITEONLY_ATTRS_WORD1 \
314(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
315
316/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
317#define NFSD_WRITEABLE_ATTRS_WORD0 \
318(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL )
319#define NFSD_WRITEABLE_ATTRS_WORD1 \
320(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
321 | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
322#define NFSD_WRITEABLE_ATTRS_WORD2 0
323
324#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
325 NFSD_WRITEABLE_ATTRS_WORD0
326/*
327 * we currently store the exclusive create verifier in the v_{a,m}time
328 * attributes so the client can't set these at create time using EXCLUSIVE4_1
329 */
330#define NFSD_SUPPATTR_EXCLCREAT_WORD1 \
331 (NFSD_WRITEABLE_ATTRS_WORD1 & \
332 ~(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET))
333#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \
334 NFSD_WRITEABLE_ATTRS_WORD2
335
336#endif /* CONFIG_NFSD_V4 */
337
338#endif /* LINUX_NFSD_NFSD_H */
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 01965b2f3a76..55c8e63af0be 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfsfh.c
3 *
4 * NFS server file handle treatment. 2 * NFS server file handle treatment.
5 * 3 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
@@ -9,19 +7,11 @@
9 * ... and again Southern-Winter 2001 to support export_operations 7 * ... and again Southern-Winter 2001 to support export_operations
10 */ 8 */
11 9
12#include <linux/slab.h>
13#include <linux/fs.h>
14#include <linux/unistd.h>
15#include <linux/string.h>
16#include <linux/stat.h>
17#include <linux/dcache.h>
18#include <linux/exportfs.h> 10#include <linux/exportfs.h>
19#include <linux/mount.h>
20 11
21#include <linux/sunrpc/clnt.h>
22#include <linux/sunrpc/svc.h>
23#include <linux/sunrpc/svcauth_gss.h> 12#include <linux/sunrpc/svcauth_gss.h>
24#include <linux/nfsd/nfsd.h> 13#include "nfsd.h"
14#include "vfs.h"
25#include "auth.h" 15#include "auth.h"
26 16
27#define NFSDDBG_FACILITY NFSDDBG_FH 17#define NFSDDBG_FACILITY NFSDDBG_FH
@@ -96,8 +86,10 @@ nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type)
96static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, 86static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
97 struct svc_export *exp) 87 struct svc_export *exp)
98{ 88{
89 int flags = nfsexp_flags(rqstp, exp);
90
99 /* Check if the request originated from a secure port. */ 91 /* Check if the request originated from a secure port. */
100 if (!rqstp->rq_secure && EX_SECURE(exp)) { 92 if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) {
101 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 93 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
102 dprintk(KERN_WARNING 94 dprintk(KERN_WARNING
103 "nfsd: request from insecure port %s!\n", 95 "nfsd: request from insecure port %s!\n",
@@ -109,6 +101,36 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
109 return nfserrno(nfsd_setuser(rqstp, exp)); 101 return nfserrno(nfsd_setuser(rqstp, exp));
110} 102}
111 103
104static inline __be32 check_pseudo_root(struct svc_rqst *rqstp,
105 struct dentry *dentry, struct svc_export *exp)
106{
107 if (!(exp->ex_flags & NFSEXP_V4ROOT))
108 return nfs_ok;
109 /*
110 * v2/v3 clients have no need for the V4ROOT export--they use
111 * the mount protocl instead; also, further V4ROOT checks may be
112 * in v4-specific code, in which case v2/v3 clients could bypass
113 * them.
114 */
115 if (!nfsd_v4client(rqstp))
116 return nfserr_stale;
117 /*
118 * We're exposing only the directories and symlinks that have to be
119 * traversed on the way to real exports:
120 */
121 if (unlikely(!S_ISDIR(dentry->d_inode->i_mode) &&
122 !S_ISLNK(dentry->d_inode->i_mode)))
123 return nfserr_stale;
124 /*
125 * A pseudoroot export gives permission to access only one
126 * single directory; the kernel has to make another upcall
127 * before granting access to anything else under it:
128 */
129 if (unlikely(dentry != exp->ex_path.dentry))
130 return nfserr_stale;
131 return nfs_ok;
132}
133
112/* 134/*
113 * Use the given filehandle to look up the corresponding export and 135 * Use the given filehandle to look up the corresponding export and
114 * dentry. On success, the results are used to set fh_export and 136 * dentry. On success, the results are used to set fh_export and
@@ -232,14 +254,6 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
232 goto out; 254 goto out;
233 } 255 }
234 256
235 if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
236 error = nfsd_setuser_and_check_port(rqstp, exp);
237 if (error) {
238 dput(dentry);
239 goto out;
240 }
241 }
242
243 if (S_ISDIR(dentry->d_inode->i_mode) && 257 if (S_ISDIR(dentry->d_inode->i_mode) &&
244 (dentry->d_flags & DCACHE_DISCONNECTED)) { 258 (dentry->d_flags & DCACHE_DISCONNECTED)) {
245 printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", 259 printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
@@ -294,28 +308,32 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
294 error = nfsd_set_fh_dentry(rqstp, fhp); 308 error = nfsd_set_fh_dentry(rqstp, fhp);
295 if (error) 309 if (error)
296 goto out; 310 goto out;
297 dentry = fhp->fh_dentry;
298 exp = fhp->fh_export;
299 } else {
300 /*
301 * just rechecking permissions
302 * (e.g. nfsproc_create calls fh_verify, then nfsd_create
303 * does as well)
304 */
305 dprintk("nfsd: fh_verify - just checking\n");
306 dentry = fhp->fh_dentry;
307 exp = fhp->fh_export;
308 /*
309 * Set user creds for this exportpoint; necessary even
310 * in the "just checking" case because this may be a
311 * filehandle that was created by fh_compose, and that
312 * is about to be used in another nfsv4 compound
313 * operation.
314 */
315 error = nfsd_setuser_and_check_port(rqstp, exp);
316 if (error)
317 goto out;
318 } 311 }
312 dentry = fhp->fh_dentry;
313 exp = fhp->fh_export;
314 /*
315 * We still have to do all these permission checks, even when
316 * fh_dentry is already set:
317 * - fh_verify may be called multiple times with different
318 * "access" arguments (e.g. nfsd_proc_create calls
319 * fh_verify(...,NFSD_MAY_EXEC) first, then later (in
320 * nfsd_create) calls fh_verify(...,NFSD_MAY_CREATE).
321 * - in the NFSv4 case, the filehandle may have been filled
322 * in by fh_compose, and given a dentry, but further
323 * compound operations performed with that filehandle
324 * still need permissions checks. In the worst case, a
325 * mountpoint crossing may have changed the export
326 * options, and we may now need to use a different uid
327 * (for example, if different id-squashing options are in
328 * effect on the new filesystem).
329 */
330 error = check_pseudo_root(rqstp, dentry, exp);
331 if (error)
332 goto out;
333
334 error = nfsd_setuser_and_check_port(rqstp, exp);
335 if (error)
336 goto out;
319 337
320 error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type); 338 error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type);
321 if (error) 339 if (error)
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
new file mode 100644
index 000000000000..cdfb8c6a4206
--- /dev/null
+++ b/fs/nfsd/nfsfh.h
@@ -0,0 +1,208 @@
1/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */
2
3#ifndef _LINUX_NFSD_FH_INT_H
4#define _LINUX_NFSD_FH_INT_H
5
6#include <linux/nfsd/nfsfh.h>
7
8enum nfsd_fsid {
9 FSID_DEV = 0,
10 FSID_NUM,
11 FSID_MAJOR_MINOR,
12 FSID_ENCODE_DEV,
13 FSID_UUID4_INUM,
14 FSID_UUID8,
15 FSID_UUID16,
16 FSID_UUID16_INUM,
17};
18
19enum fsid_source {
20 FSIDSOURCE_DEV,
21 FSIDSOURCE_FSID,
22 FSIDSOURCE_UUID,
23};
24extern enum fsid_source fsid_source(struct svc_fh *fhp);
25
26
27/* This might look a little large to "inline" but in all calls except
28 * one, 'vers' is constant so moste of the function disappears.
29 */
30static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
31 u32 fsid, unsigned char *uuid)
32{
33 u32 *up;
34 switch(vers) {
35 case FSID_DEV:
36 fsidv[0] = htonl((MAJOR(dev)<<16) |
37 MINOR(dev));
38 fsidv[1] = ino_t_to_u32(ino);
39 break;
40 case FSID_NUM:
41 fsidv[0] = fsid;
42 break;
43 case FSID_MAJOR_MINOR:
44 fsidv[0] = htonl(MAJOR(dev));
45 fsidv[1] = htonl(MINOR(dev));
46 fsidv[2] = ino_t_to_u32(ino);
47 break;
48
49 case FSID_ENCODE_DEV:
50 fsidv[0] = new_encode_dev(dev);
51 fsidv[1] = ino_t_to_u32(ino);
52 break;
53
54 case FSID_UUID4_INUM:
55 /* 4 byte fsid and inode number */
56 up = (u32*)uuid;
57 fsidv[0] = ino_t_to_u32(ino);
58 fsidv[1] = up[0] ^ up[1] ^ up[2] ^ up[3];
59 break;
60
61 case FSID_UUID8:
62 /* 8 byte fsid */
63 up = (u32*)uuid;
64 fsidv[0] = up[0] ^ up[2];
65 fsidv[1] = up[1] ^ up[3];
66 break;
67
68 case FSID_UUID16:
69 /* 16 byte fsid - NFSv3+ only */
70 memcpy(fsidv, uuid, 16);
71 break;
72
73 case FSID_UUID16_INUM:
74 /* 8 byte inode and 16 byte fsid */
75 *(u64*)fsidv = (u64)ino;
76 memcpy(fsidv+2, uuid, 16);
77 break;
78 default: BUG();
79 }
80}
81
82static inline int key_len(int type)
83{
84 switch(type) {
85 case FSID_DEV: return 8;
86 case FSID_NUM: return 4;
87 case FSID_MAJOR_MINOR: return 12;
88 case FSID_ENCODE_DEV: return 8;
89 case FSID_UUID4_INUM: return 8;
90 case FSID_UUID8: return 8;
91 case FSID_UUID16: return 16;
92 case FSID_UUID16_INUM: return 24;
93 default: return 0;
94 }
95}
96
97/*
98 * Shorthand for dprintk()'s
99 */
100extern char * SVCFH_fmt(struct svc_fh *fhp);
101
102/*
103 * Function prototypes
104 */
105__be32 fh_verify(struct svc_rqst *, struct svc_fh *, int, int);
106__be32 fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
107__be32 fh_update(struct svc_fh *);
108void fh_put(struct svc_fh *);
109
110static __inline__ struct svc_fh *
111fh_copy(struct svc_fh *dst, struct svc_fh *src)
112{
113 WARN_ON(src->fh_dentry || src->fh_locked);
114
115 *dst = *src;
116 return dst;
117}
118
119static inline void
120fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
121{
122 dst->fh_size = src->fh_size;
123 memcpy(&dst->fh_base, &src->fh_base, src->fh_size);
124}
125
126static __inline__ struct svc_fh *
127fh_init(struct svc_fh *fhp, int maxsize)
128{
129 memset(fhp, 0, sizeof(*fhp));
130 fhp->fh_maxsize = maxsize;
131 return fhp;
132}
133
134#ifdef CONFIG_NFSD_V3
135/*
136 * Fill in the pre_op attr for the wcc data
137 */
138static inline void
139fill_pre_wcc(struct svc_fh *fhp)
140{
141 struct inode *inode;
142
143 inode = fhp->fh_dentry->d_inode;
144 if (!fhp->fh_pre_saved) {
145 fhp->fh_pre_mtime = inode->i_mtime;
146 fhp->fh_pre_ctime = inode->i_ctime;
147 fhp->fh_pre_size = inode->i_size;
148 fhp->fh_pre_change = inode->i_version;
149 fhp->fh_pre_saved = 1;
150 }
151}
152
153extern void fill_post_wcc(struct svc_fh *);
154#else
155#define fill_pre_wcc(ignored)
156#define fill_post_wcc(notused)
157#endif /* CONFIG_NFSD_V3 */
158
159
160/*
161 * Lock a file handle/inode
162 * NOTE: both fh_lock and fh_unlock are done "by hand" in
163 * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once
164 * so, any changes here should be reflected there.
165 */
166
167static inline void
168fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
169{
170 struct dentry *dentry = fhp->fh_dentry;
171 struct inode *inode;
172
173 BUG_ON(!dentry);
174
175 if (fhp->fh_locked) {
176 printk(KERN_WARNING "fh_lock: %s/%s already locked!\n",
177 dentry->d_parent->d_name.name, dentry->d_name.name);
178 return;
179 }
180
181 inode = dentry->d_inode;
182 mutex_lock_nested(&inode->i_mutex, subclass);
183 fill_pre_wcc(fhp);
184 fhp->fh_locked = 1;
185}
186
187static inline void
188fh_lock(struct svc_fh *fhp)
189{
190 fh_lock_nested(fhp, I_MUTEX_NORMAL);
191}
192
193/*
194 * Unlock a file handle/inode
195 */
196static inline void
197fh_unlock(struct svc_fh *fhp)
198{
199 BUG_ON(!fhp->fh_dentry);
200
201 if (fhp->fh_locked) {
202 fill_post_wcc(fhp);
203 mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex);
204 fhp->fh_locked = 0;
205 }
206}
207
208#endif /* _LINUX_NFSD_FH_INT_H */
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0eb9c820b7a6..a047ad6111ef 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -1,29 +1,14 @@
1/* 1/*
2 * nfsproc2.c Process version 2 NFS requests.
3 * linux/fs/nfsd/nfs2proc.c
4 *
5 * Process version 2 NFS requests. 2 * Process version 2 NFS requests.
6 * 3 *
7 * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
8 */ 5 */
9 6
10#include <linux/linkage.h>
11#include <linux/time.h>
12#include <linux/errno.h>
13#include <linux/fs.h>
14#include <linux/stat.h>
15#include <linux/fcntl.h>
16#include <linux/net.h>
17#include <linux/in.h>
18#include <linux/namei.h> 7#include <linux/namei.h>
19#include <linux/unistd.h>
20#include <linux/slab.h>
21 8
22#include <linux/sunrpc/clnt.h> 9#include "cache.h"
23#include <linux/sunrpc/svc.h> 10#include "xdr.h"
24#include <linux/nfsd/nfsd.h> 11#include "vfs.h"
25#include <linux/nfsd/cache.h>
26#include <linux/nfsd/xdr.h>
27 12
28typedef struct svc_rqst svc_rqst; 13typedef struct svc_rqst svc_rqst;
29typedef struct svc_buf svc_buf; 14typedef struct svc_buf svc_buf;
@@ -758,6 +743,7 @@ nfserrno (int errno)
758 { nfserr_io, -ETXTBSY }, 743 { nfserr_io, -ETXTBSY },
759 { nfserr_notsupp, -EOPNOTSUPP }, 744 { nfserr_notsupp, -EOPNOTSUPP },
760 { nfserr_toosmall, -ETOOSMALL }, 745 { nfserr_toosmall, -ETOOSMALL },
746 { nfserr_serverfault, -ESERVERFAULT },
761 }; 747 };
762 int i; 748 int i;
763 749
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 67ea83eedd43..171699eb07c8 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/nfssvc.c
3 *
4 * Central processing for nfsd. 2 * Central processing for nfsd.
5 * 3 *
6 * Authors: Olaf Kirch (okir@monad.swb.de) 4 * Authors: Olaf Kirch (okir@monad.swb.de)
@@ -8,33 +6,19 @@
8 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> 6 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
9 */ 7 */
10 8
11#include <linux/module.h>
12#include <linux/sched.h> 9#include <linux/sched.h>
13#include <linux/time.h>
14#include <linux/errno.h>
15#include <linux/nfs.h>
16#include <linux/in.h>
17#include <linux/uio.h>
18#include <linux/unistd.h>
19#include <linux/slab.h>
20#include <linux/smp.h>
21#include <linux/freezer.h> 10#include <linux/freezer.h>
22#include <linux/fs_struct.h> 11#include <linux/fs_struct.h>
23#include <linux/kthread.h>
24#include <linux/swap.h> 12#include <linux/swap.h>
25 13
26#include <linux/sunrpc/types.h>
27#include <linux/sunrpc/stats.h> 14#include <linux/sunrpc/stats.h>
28#include <linux/sunrpc/svc.h>
29#include <linux/sunrpc/svcsock.h> 15#include <linux/sunrpc/svcsock.h>
30#include <linux/sunrpc/cache.h>
31#include <linux/nfsd/nfsd.h>
32#include <linux/nfsd/stats.h>
33#include <linux/nfsd/cache.h>
34#include <linux/nfsd/syscall.h>
35#include <linux/lockd/bind.h> 16#include <linux/lockd/bind.h>
36#include <linux/nfsacl.h> 17#include <linux/nfsacl.h>
37#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include "nfsd.h"
20#include "cache.h"
21#include "vfs.h"
38 22
39#define NFSDDBG_FACILITY NFSDDBG_SVC 23#define NFSDDBG_FACILITY NFSDDBG_SVC
40 24
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index afd08e2c90a5..4ce005dbf3e6 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -1,20 +1,10 @@
1/* 1/*
2 * linux/fs/nfsd/nfsxdr.c
3 *
4 * XDR support for nfsd 2 * XDR support for nfsd
5 * 3 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 */ 5 */
8 6
9#include <linux/types.h> 7#include "xdr.h"
10#include <linux/time.h>
11#include <linux/nfs.h>
12#include <linux/vfs.h>
13#include <linux/sunrpc/xdr.h>
14#include <linux/sunrpc/svc.h>
15#include <linux/nfsd/nfsd.h>
16#include <linux/nfsd/xdr.h>
17#include <linux/mm.h>
18#include "auth.h" 8#include "auth.h"
19 9
20#define NFSDDBG_FACILITY NFSDDBG_XDR 10#define NFSDDBG_FACILITY NFSDDBG_XDR
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
new file mode 100644
index 000000000000..fefeae27f25e
--- /dev/null
+++ b/fs/nfsd/state.h
@@ -0,0 +1,408 @@
1/*
2 * Copyright (c) 2001 The Regents of the University of Michigan.
3 * All rights reserved.
4 *
5 * Kendrick Smith <kmsmith@umich.edu>
6 * Andy Adamson <andros@umich.edu>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
22 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
23 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 */
34
35#ifndef _NFSD4_STATE_H
36#define _NFSD4_STATE_H
37
38#include <linux/nfsd/nfsfh.h>
39#include "nfsfh.h"
40
41typedef struct {
42 u32 cl_boot;
43 u32 cl_id;
44} clientid_t;
45
46typedef struct {
47 u32 so_boot;
48 u32 so_stateownerid;
49 u32 so_fileid;
50} stateid_opaque_t;
51
52typedef struct {
53 u32 si_generation;
54 stateid_opaque_t si_opaque;
55} stateid_t;
56#define si_boot si_opaque.so_boot
57#define si_stateownerid si_opaque.so_stateownerid
58#define si_fileid si_opaque.so_fileid
59
60#define STATEID_FMT "(%08x/%08x/%08x/%08x)"
61#define STATEID_VAL(s) \
62 (s)->si_boot, \
63 (s)->si_stateownerid, \
64 (s)->si_fileid, \
65 (s)->si_generation
66
67struct nfsd4_cb_sequence {
68 /* args/res */
69 u32 cbs_minorversion;
70 struct nfs4_client *cbs_clp;
71};
72
73struct nfs4_delegation {
74 struct list_head dl_perfile;
75 struct list_head dl_perclnt;
76 struct list_head dl_recall_lru; /* delegation recalled */
77 atomic_t dl_count; /* ref count */
78 struct nfs4_client *dl_client;
79 struct nfs4_file *dl_file;
80 struct file_lock *dl_flock;
81 struct file *dl_vfs_file;
82 u32 dl_type;
83 time_t dl_time;
84/* For recall: */
85 u32 dl_ident;
86 stateid_t dl_stateid;
87 struct knfsd_fh dl_fh;
88 int dl_retries;
89};
90
91/* client delegation callback info */
92struct nfs4_cb_conn {
93 /* SETCLIENTID info */
94 struct sockaddr_storage cb_addr;
95 size_t cb_addrlen;
96 u32 cb_prog;
97 u32 cb_minorversion;
98 u32 cb_ident; /* minorversion 0 only */
99 /* RPC client info */
100 atomic_t cb_set; /* successful CB_NULL call */
101 struct rpc_clnt * cb_client;
102};
103
104/* Maximum number of slots per session. 160 is useful for long haul TCP */
105#define NFSD_MAX_SLOTS_PER_SESSION 160
106/* Maximum number of operations per session compound */
107#define NFSD_MAX_OPS_PER_COMPOUND 16
108/* Maximum session per slot cache size */
109#define NFSD_SLOT_CACHE_SIZE 1024
110/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
111#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32
112#define NFSD_MAX_MEM_PER_SESSION \
113 (NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
114
115struct nfsd4_slot {
116 bool sl_inuse;
117 bool sl_cachethis;
118 u16 sl_opcnt;
119 u32 sl_seqid;
120 __be32 sl_status;
121 u32 sl_datalen;
122 char sl_data[];
123};
124
125struct nfsd4_channel_attrs {
126 u32 headerpadsz;
127 u32 maxreq_sz;
128 u32 maxresp_sz;
129 u32 maxresp_cached;
130 u32 maxops;
131 u32 maxreqs;
132 u32 nr_rdma_attrs;
133 u32 rdma_attrs;
134};
135
136struct nfsd4_create_session {
137 clientid_t clientid;
138 struct nfs4_sessionid sessionid;
139 u32 seqid;
140 u32 flags;
141 struct nfsd4_channel_attrs fore_channel;
142 struct nfsd4_channel_attrs back_channel;
143 u32 callback_prog;
144 u32 uid;
145 u32 gid;
146};
147
148/* The single slot clientid cache structure */
149struct nfsd4_clid_slot {
150 u32 sl_seqid;
151 __be32 sl_status;
152 struct nfsd4_create_session sl_cr_ses;
153};
154
155struct nfsd4_session {
156 struct kref se_ref;
157 struct list_head se_hash; /* hash by sessionid */
158 struct list_head se_perclnt;
159 u32 se_flags;
160 struct nfs4_client *se_client; /* for expire_client */
161 struct nfs4_sessionid se_sessionid;
162 struct nfsd4_channel_attrs se_fchannel;
163 struct nfsd4_channel_attrs se_bchannel;
164 struct nfsd4_slot *se_slots[]; /* forward channel slots */
165};
166
167static inline void
168nfsd4_put_session(struct nfsd4_session *ses)
169{
170 extern void free_session(struct kref *kref);
171 kref_put(&ses->se_ref, free_session);
172}
173
174static inline void
175nfsd4_get_session(struct nfsd4_session *ses)
176{
177 kref_get(&ses->se_ref);
178}
179
180/* formatted contents of nfs4_sessionid */
181struct nfsd4_sessionid {
182 clientid_t clientid;
183 u32 sequence;
184 u32 reserved;
185};
186
187#define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */
188
189/*
190 * struct nfs4_client - one per client. Clientids live here.
191 * o Each nfs4_client is hashed by clientid.
192 *
193 * o Each nfs4_clients is also hashed by name
194 * (the opaque quantity initially sent by the client to identify itself).
195 *
196 * o cl_perclient list is used to ensure no dangling stateowner references
197 * when we expire the nfs4_client
198 */
199struct nfs4_client {
200 struct list_head cl_idhash; /* hash by cl_clientid.id */
201 struct list_head cl_strhash; /* hash by cl_name */
202 struct list_head cl_openowners;
203 struct list_head cl_delegations;
204 struct list_head cl_lru; /* tail queue */
205 struct xdr_netobj cl_name; /* id generated by client */
206 char cl_recdir[HEXDIR_LEN]; /* recovery dir */
207 nfs4_verifier cl_verifier; /* generated by client */
208 time_t cl_time; /* time of last lease renewal */
209 struct sockaddr_storage cl_addr; /* client ipaddress */
210 u32 cl_flavor; /* setclientid pseudoflavor */
211 char *cl_principal; /* setclientid principal name */
212 struct svc_cred cl_cred; /* setclientid principal */
213 clientid_t cl_clientid; /* generated by server */
214 nfs4_verifier cl_confirm; /* generated by server */
215 struct nfs4_cb_conn cl_cb_conn; /* callback info */
216 atomic_t cl_count; /* ref count */
217 u32 cl_firststate; /* recovery dir creation */
218
219 /* for nfs41 */
220 struct list_head cl_sessions;
221 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
222 u32 cl_exchange_flags;
223 struct nfs4_sessionid cl_sessionid;
224
225 /* for nfs41 callbacks */
226 /* We currently support a single back channel with a single slot */
227 unsigned long cl_cb_slot_busy;
228 u32 cl_cb_seq_nr;
229 struct svc_xprt *cl_cb_xprt; /* 4.1 callback transport */
230 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
231 /* wait here for slots */
232};
233
234/* struct nfs4_client_reset
235 * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
236 * upon lease reset, or from upcall to state_daemon (to read in state
237 * from non-volitile storage) upon reboot.
238 */
239struct nfs4_client_reclaim {
240 struct list_head cr_strhash; /* hash by cr_name */
241 char cr_recdir[HEXDIR_LEN]; /* recover dir */
242};
243
244static inline void
245update_stateid(stateid_t *stateid)
246{
247 stateid->si_generation++;
248}
249
250/* A reasonable value for REPLAY_ISIZE was estimated as follows:
251 * The OPEN response, typically the largest, requires
252 * 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) +
253 * 4(deleg. type) + 8(deleg. stateid) + 4(deleg. recall flag) +
254 * 20(deleg. space limit) + ~32(deleg. ace) = 112 bytes
255 */
256
257#define NFSD4_REPLAY_ISIZE 112
258
259/*
260 * Replay buffer, where the result of the last seqid-mutating operation
261 * is cached.
262 */
263struct nfs4_replay {
264 __be32 rp_status;
265 unsigned int rp_buflen;
266 char *rp_buf;
267 unsigned intrp_allocated;
268 struct knfsd_fh rp_openfh;
269 char rp_ibuf[NFSD4_REPLAY_ISIZE];
270};
271
272/*
273* nfs4_stateowner can either be an open_owner, or a lock_owner
274*
275* so_idhash: stateid_hashtbl[] for open owner, lockstateid_hashtbl[]
276* for lock_owner
277* so_strhash: ownerstr_hashtbl[] for open_owner, lock_ownerstr_hashtbl[]
278* for lock_owner
279* so_perclient: nfs4_client->cl_perclient entry - used when nfs4_client
280* struct is reaped.
281* so_perfilestate: heads the list of nfs4_stateid (either open or lock)
282* and is used to ensure no dangling nfs4_stateid references when we
283* release a stateowner.
284* so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when
285* close is called to reap associated byte-range locks
286* so_close_lru: (open) stateowner is placed on this list instead of being
287* reaped (when so_perfilestate is empty) to hold the last close replay.
288* reaped by laundramat thread after lease period.
289*/
290struct nfs4_stateowner {
291 struct kref so_ref;
292 struct list_head so_idhash; /* hash by so_id */
293 struct list_head so_strhash; /* hash by op_name */
294 struct list_head so_perclient;
295 struct list_head so_stateids;
296 struct list_head so_perstateid; /* for lockowners only */
297 struct list_head so_close_lru; /* tail queue */
298 time_t so_time; /* time of placement on so_close_lru */
299 int so_is_open_owner; /* 1=openowner,0=lockowner */
300 u32 so_id;
301 struct nfs4_client * so_client;
302 /* after increment in ENCODE_SEQID_OP_TAIL, represents the next
303 * sequence id expected from the client: */
304 u32 so_seqid;
305 struct xdr_netobj so_owner; /* open owner name */
306 int so_confirmed; /* successful OPEN_CONFIRM? */
307 struct nfs4_replay so_replay;
308};
309
310/*
311* nfs4_file: a file opened by some number of (open) nfs4_stateowners.
312* o fi_perfile list is used to search for conflicting
313* share_acces, share_deny on the file.
314*/
315struct nfs4_file {
316 atomic_t fi_ref;
317 struct list_head fi_hash; /* hash by "struct inode *" */
318 struct list_head fi_stateids;
319 struct list_head fi_delegations;
320 struct inode *fi_inode;
321 u32 fi_id; /* used with stateowner->so_id
322 * for stateid_hashtbl hash */
323 bool fi_had_conflict;
324};
325
326/*
327* nfs4_stateid can either be an open stateid or (eventually) a lock stateid
328*
329* (open)nfs4_stateid: one per (open)nfs4_stateowner, nfs4_file
330*
331* st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry
332* st_perfile: file_hashtbl[] entry.
333* st_perfile_state: nfs4_stateowner->so_perfilestate
334* st_perlockowner: (open stateid) list of lock nfs4_stateowners
335* st_access_bmap: used only for open stateid
336* st_deny_bmap: used only for open stateid
337* st_openstp: open stateid lock stateid was derived from
338*
339* XXX: open stateids and lock stateids have diverged sufficiently that
340* we should consider defining separate structs for the two cases.
341*/
342
343struct nfs4_stateid {
344 struct list_head st_hash;
345 struct list_head st_perfile;
346 struct list_head st_perstateowner;
347 struct list_head st_lockowners;
348 struct nfs4_stateowner * st_stateowner;
349 struct nfs4_file * st_file;
350 stateid_t st_stateid;
351 struct file * st_vfs_file;
352 unsigned long st_access_bmap;
353 unsigned long st_deny_bmap;
354 struct nfs4_stateid * st_openstp;
355};
356
357/* flags for preprocess_seqid_op() */
358#define HAS_SESSION 0x00000001
359#define CONFIRM 0x00000002
360#define OPEN_STATE 0x00000004
361#define LOCK_STATE 0x00000008
362#define RD_STATE 0x00000010
363#define WR_STATE 0x00000020
364#define CLOSE_STATE 0x00000040
365
366#define seqid_mutating_err(err) \
367 (((err) != nfserr_stale_clientid) && \
368 ((err) != nfserr_bad_seqid) && \
369 ((err) != nfserr_stale_stateid) && \
370 ((err) != nfserr_bad_stateid))
371
372struct nfsd4_compound_state;
373
374extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
375 stateid_t *stateid, int flags, struct file **filp);
376extern void nfs4_lock_state(void);
377extern void nfs4_unlock_state(void);
378extern int nfs4_in_grace(void);
379extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
380extern void put_nfs4_client(struct nfs4_client *clp);
381extern void nfs4_free_stateowner(struct kref *kref);
382extern int set_callback_cred(void);
383extern void nfsd4_probe_callback(struct nfs4_client *clp);
384extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
385extern void nfs4_put_delegation(struct nfs4_delegation *dp);
386extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
387extern void nfsd4_init_recdir(char *recdir_name);
388extern int nfsd4_recdir_load(void);
389extern void nfsd4_shutdown_recdir(void);
390extern int nfs4_client_to_reclaim(const char *name);
391extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
392extern void nfsd4_recdir_purge_old(void);
393extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
394extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
395
396static inline void
397nfs4_put_stateowner(struct nfs4_stateowner *so)
398{
399 kref_put(&so->so_ref, nfs4_free_stateowner);
400}
401
402static inline void
403nfs4_get_stateowner(struct nfs4_stateowner *so)
404{
405 kref_get(&so->so_ref);
406}
407
408#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 71944cddf680..5232d3e8fb2f 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/fs/nfsd/stats.c
3 *
4 * procfs-based user access to knfsd statistics 2 * procfs-based user access to knfsd statistics
5 * 3 *
6 * /proc/net/rpc/nfsd 4 * /proc/net/rpc/nfsd
@@ -23,18 +21,13 @@
23 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> 21 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
24 */ 22 */
25 23
26#include <linux/kernel.h>
27#include <linux/time.h>
28#include <linux/proc_fs.h>
29#include <linux/seq_file.h> 24#include <linux/seq_file.h>
30#include <linux/stat.h>
31#include <linux/module.h> 25#include <linux/module.h>
32
33#include <linux/sunrpc/svc.h>
34#include <linux/sunrpc/stats.h> 26#include <linux/sunrpc/stats.h>
35#include <linux/nfsd/nfsd.h>
36#include <linux/nfsd/stats.h> 27#include <linux/nfsd/stats.h>
37 28
29#include "nfsd.h"
30
38struct nfsd_stats nfsdstats; 31struct nfsd_stats nfsdstats;
39struct svc_stat nfsd_svcstats = { 32struct svc_stat nfsd_svcstats = {
40 .program = &nfsd_program, 33 .program = &nfsd_program,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a293f0273263..c194793b642b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1,7 +1,5 @@
1#define MSNFS /* HACK HACK */ 1#define MSNFS /* HACK HACK */
2/* 2/*
3 * linux/fs/nfsd/vfs.c
4 *
5 * File operations used by nfsd. Some of these have been ripped from 3 * File operations used by nfsd. Some of these have been ripped from
6 * other parts of the kernel because they weren't exported, others 4 * other parts of the kernel because they weren't exported, others
7 * are partial duplicates with added or changed functionality. 5 * are partial duplicates with added or changed functionality.
@@ -16,48 +14,31 @@
16 * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp> 14 * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
17 */ 15 */
18 16
19#include <linux/string.h>
20#include <linux/time.h>
21#include <linux/errno.h>
22#include <linux/fs.h> 17#include <linux/fs.h>
23#include <linux/file.h> 18#include <linux/file.h>
24#include <linux/mount.h>
25#include <linux/major.h>
26#include <linux/splice.h> 19#include <linux/splice.h>
27#include <linux/proc_fs.h>
28#include <linux/stat.h>
29#include <linux/fcntl.h> 20#include <linux/fcntl.h>
30#include <linux/net.h>
31#include <linux/unistd.h>
32#include <linux/slab.h>
33#include <linux/pagemap.h>
34#include <linux/in.h>
35#include <linux/module.h>
36#include <linux/namei.h> 21#include <linux/namei.h>
37#include <linux/vfs.h>
38#include <linux/delay.h> 22#include <linux/delay.h>
39#include <linux/sunrpc/svc.h>
40#include <linux/nfsd/nfsd.h>
41#ifdef CONFIG_NFSD_V3
42#include <linux/nfs3.h>
43#include <linux/nfsd/xdr3.h>
44#endif /* CONFIG_NFSD_V3 */
45#include <linux/nfsd/nfsfh.h>
46#include <linux/quotaops.h> 23#include <linux/quotaops.h>
47#include <linux/fsnotify.h> 24#include <linux/fsnotify.h>
48#include <linux/posix_acl.h>
49#include <linux/posix_acl_xattr.h> 25#include <linux/posix_acl_xattr.h>
50#include <linux/xattr.h> 26#include <linux/xattr.h>
27#include <linux/jhash.h>
28#include <linux/ima.h>
29#include <asm/uaccess.h>
30
31#ifdef CONFIG_NFSD_V3
32#include "xdr3.h"
33#endif /* CONFIG_NFSD_V3 */
34
51#ifdef CONFIG_NFSD_V4 35#ifdef CONFIG_NFSD_V4
52#include <linux/nfs4.h>
53#include <linux/nfs4_acl.h> 36#include <linux/nfs4_acl.h>
54#include <linux/nfsd_idmap.h> 37#include <linux/nfsd_idmap.h>
55#include <linux/security.h>
56#endif /* CONFIG_NFSD_V4 */ 38#endif /* CONFIG_NFSD_V4 */
57#include <linux/jhash.h>
58#include <linux/ima.h>
59 39
60#include <asm/uaccess.h> 40#include "nfsd.h"
41#include "vfs.h"
61 42
62#define NFSDDBG_FACILITY NFSDDBG_FILEOP 43#define NFSDDBG_FACILITY NFSDDBG_FILEOP
63 44
@@ -89,12 +70,6 @@ struct raparm_hbucket {
89#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) 70#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
90static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; 71static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
91 72
92static inline int
93nfsd_v4client(struct svc_rqst *rq)
94{
95 return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
96}
97
98/* 73/*
99 * Called from nfsd_lookup and encode_dirent. Check if we have crossed 74 * Called from nfsd_lookup and encode_dirent. Check if we have crossed
100 * a mount point. 75 * a mount point.
@@ -116,8 +91,16 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
116 91
117 exp2 = rqst_exp_get_by_name(rqstp, &path); 92 exp2 = rqst_exp_get_by_name(rqstp, &path);
118 if (IS_ERR(exp2)) { 93 if (IS_ERR(exp2)) {
119 if (PTR_ERR(exp2) != -ENOENT) 94 err = PTR_ERR(exp2);
120 err = PTR_ERR(exp2); 95 /*
96 * We normally allow NFS clients to continue
97 * "underneath" a mountpoint that is not exported.
98 * The exception is V4ROOT, where no traversal is ever
99 * allowed without an explicit export of the new
100 * directory.
101 */
102 if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT))
103 err = 0;
121 path_put(&path); 104 path_put(&path);
122 goto out; 105 goto out;
123 } 106 }
@@ -141,6 +124,53 @@ out:
141 return err; 124 return err;
142} 125}
143 126
127static void follow_to_parent(struct path *path)
128{
129 struct dentry *dp;
130
131 while (path->dentry == path->mnt->mnt_root && follow_up(path))
132 ;
133 dp = dget_parent(path->dentry);
134 dput(path->dentry);
135 path->dentry = dp;
136}
137
138static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, struct svc_export **exp, struct dentry **dentryp)
139{
140 struct svc_export *exp2;
141 struct path path = {.mnt = mntget((*exp)->ex_path.mnt),
142 .dentry = dget(dparent)};
143
144 follow_to_parent(&path);
145
146 exp2 = rqst_exp_parent(rqstp, &path);
147 if (PTR_ERR(exp2) == -ENOENT) {
148 *dentryp = dget(dparent);
149 } else if (IS_ERR(exp2)) {
150 path_put(&path);
151 return PTR_ERR(exp2);
152 } else {
153 *dentryp = dget(path.dentry);
154 exp_put(*exp);
155 *exp = exp2;
156 }
157 path_put(&path);
158 return 0;
159}
160
161/*
162 * For nfsd purposes, we treat V4ROOT exports as though there was an
163 * export at *every* directory.
164 */
165int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
166{
167 if (d_mountpoint(dentry))
168 return 1;
169 if (!(exp->ex_flags & NFSEXP_V4ROOT))
170 return 0;
171 return dentry->d_inode != NULL;
172}
173
144__be32 174__be32
145nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, 175nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
146 const char *name, unsigned int len, 176 const char *name, unsigned int len,
@@ -169,35 +199,13 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
169 dentry = dget(dparent); 199 dentry = dget(dparent);
170 else if (dparent != exp->ex_path.dentry) 200 else if (dparent != exp->ex_path.dentry)
171 dentry = dget_parent(dparent); 201 dentry = dget_parent(dparent);
172 else if (!EX_NOHIDE(exp)) 202 else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp))
173 dentry = dget(dparent); /* .. == . just like at / */ 203 dentry = dget(dparent); /* .. == . just like at / */
174 else { 204 else {
175 /* checking mountpoint crossing is very different when stepping up */ 205 /* checking mountpoint crossing is very different when stepping up */
176 struct svc_export *exp2 = NULL; 206 host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry);
177 struct dentry *dp; 207 if (host_err)
178 struct path path = {.mnt = mntget(exp->ex_path.mnt),
179 .dentry = dget(dparent)};
180
181 while (path.dentry == path.mnt->mnt_root &&
182 follow_up(&path))
183 ;
184 dp = dget_parent(path.dentry);
185 dput(path.dentry);
186 path.dentry = dp;
187
188 exp2 = rqst_exp_parent(rqstp, &path);
189 if (PTR_ERR(exp2) == -ENOENT) {
190 dentry = dget(dparent);
191 } else if (IS_ERR(exp2)) {
192 host_err = PTR_ERR(exp2);
193 path_put(&path);
194 goto out_nfserr; 208 goto out_nfserr;
195 } else {
196 dentry = dget(path.dentry);
197 exp_put(exp);
198 exp = exp2;
199 }
200 path_put(&path);
201 } 209 }
202 } else { 210 } else {
203 fh_lock(fhp); 211 fh_lock(fhp);
@@ -208,7 +216,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
208 /* 216 /*
209 * check if we have crossed a mount point ... 217 * check if we have crossed a mount point ...
210 */ 218 */
211 if (d_mountpoint(dentry)) { 219 if (nfsd_mountpoint(dentry, exp)) {
212 if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { 220 if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
213 dput(dentry); 221 dput(dentry);
214 goto out_nfserr; 222 goto out_nfserr;
@@ -744,8 +752,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
744 flags, current_cred()); 752 flags, current_cred());
745 if (IS_ERR(*filp)) 753 if (IS_ERR(*filp))
746 host_err = PTR_ERR(*filp); 754 host_err = PTR_ERR(*filp);
747 else
748 ima_counts_get(*filp);
749out_nfserr: 755out_nfserr:
750 err = nfserrno(host_err); 756 err = nfserrno(host_err);
751out: 757out:
@@ -774,12 +780,9 @@ static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
774 int (*fsync) (struct file *, struct dentry *, int); 780 int (*fsync) (struct file *, struct dentry *, int);
775 int err; 781 int err;
776 782
777 err = filemap_fdatawrite(inode->i_mapping); 783 err = filemap_write_and_wait(inode->i_mapping);
778 if (err == 0 && fop && (fsync = fop->fsync)) 784 if (err == 0 && fop && (fsync = fop->fsync))
779 err = fsync(filp, dp, 0); 785 err = fsync(filp, dp, 0);
780 if (err == 0)
781 err = filemap_fdatawait(inode->i_mapping);
782
783 return err; 786 return err;
784} 787}
785 788
@@ -2124,8 +2127,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2124 */ 2127 */
2125 path.mnt = exp->ex_path.mnt; 2128 path.mnt = exp->ex_path.mnt;
2126 path.dentry = dentry; 2129 path.dentry = dentry;
2127 err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC), 2130 err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC));
2128 IMA_COUNT_LEAVE);
2129nfsd_out: 2131nfsd_out:
2130 return err? nfserrno(err) : 0; 2132 return err? nfserrno(err) : 0;
2131} 2133}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
new file mode 100644
index 000000000000..4b1de0a9ea75
--- /dev/null
+++ b/fs/nfsd/vfs.h
@@ -0,0 +1,101 @@
1/*
2 * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
3 */
4
5#ifndef LINUX_NFSD_VFS_H
6#define LINUX_NFSD_VFS_H
7
8#include "nfsfh.h"
9
10/*
11 * Flags for nfsd_permission
12 */
13#define NFSD_MAY_NOP 0
14#define NFSD_MAY_EXEC 1 /* == MAY_EXEC */
15#define NFSD_MAY_WRITE 2 /* == MAY_WRITE */
16#define NFSD_MAY_READ 4 /* == MAY_READ */
17#define NFSD_MAY_SATTR 8
18#define NFSD_MAY_TRUNC 16
19#define NFSD_MAY_LOCK 32
20#define NFSD_MAY_OWNER_OVERRIDE 64
21#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/
22#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
23
24#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
25#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
26
27/*
28 * Callback function for readdir
29 */
30typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
31
32/* nfsd/vfs.c */
33int fh_lock_parent(struct svc_fh *, struct dentry *);
34int nfsd_racache_init(int);
35void nfsd_racache_shutdown(void);
36int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
37 struct svc_export **expp);
38__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
39 const char *, unsigned int, struct svc_fh *);
40__be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
41 const char *, unsigned int,
42 struct svc_export **, struct dentry **);
43__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
44 struct iattr *, int, time_t);
45int nfsd_mountpoint(struct dentry *, struct svc_export *);
46#ifdef CONFIG_NFSD_V4
47__be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
48 struct nfs4_acl *);
49int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **);
50#endif /* CONFIG_NFSD_V4 */
51__be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
52 char *name, int len, struct iattr *attrs,
53 int type, dev_t rdev, struct svc_fh *res);
54#ifdef CONFIG_NFSD_V3
55__be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
56__be32 nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
57 char *name, int len, struct iattr *attrs,
58 struct svc_fh *res, int createmode,
59 u32 *verifier, int *truncp, int *created);
60__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
61 loff_t, unsigned long);
62#endif /* CONFIG_NFSD_V3 */
63__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, int,
64 int, struct file **);
65void nfsd_close(struct file *);
66__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *,
67 loff_t, struct kvec *, int, unsigned long *);
68__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
69 loff_t, struct kvec *,int, unsigned long *, int *);
70__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
71 char *, int *);
72__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
73 char *name, int len, char *path, int plen,
74 struct svc_fh *res, struct iattr *);
75__be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
76 char *, int, struct svc_fh *);
77__be32 nfsd_rename(struct svc_rqst *,
78 struct svc_fh *, char *, int,
79 struct svc_fh *, char *, int);
80__be32 nfsd_remove(struct svc_rqst *,
81 struct svc_fh *, char *, int);
82__be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
83 char *name, int len);
84int nfsd_truncate(struct svc_rqst *, struct svc_fh *,
85 unsigned long size);
86__be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *,
87 loff_t *, struct readdir_cd *, filldir_t);
88__be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
89 struct kstatfs *, int access);
90
91int nfsd_notify_change(struct inode *, struct iattr *);
92__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
93 struct dentry *, int);
94int nfsd_sync_dir(struct dentry *dp);
95
96#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
97struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
98int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
99#endif
100
101#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
new file mode 100644
index 000000000000..53b1863dd8f6
--- /dev/null
+++ b/fs/nfsd/xdr.h
@@ -0,0 +1,173 @@
1/* XDR types for nfsd. This is mainly a typing exercise. */
2
3#ifndef LINUX_NFSD_H
4#define LINUX_NFSD_H
5
6#include <linux/vfs.h>
7#include "nfsd.h"
8#include "nfsfh.h"
9
10struct nfsd_fhandle {
11 struct svc_fh fh;
12};
13
14struct nfsd_sattrargs {
15 struct svc_fh fh;
16 struct iattr attrs;
17};
18
19struct nfsd_diropargs {
20 struct svc_fh fh;
21 char * name;
22 unsigned int len;
23};
24
25struct nfsd_readargs {
26 struct svc_fh fh;
27 __u32 offset;
28 __u32 count;
29 int vlen;
30};
31
32struct nfsd_writeargs {
33 svc_fh fh;
34 __u32 offset;
35 int len;
36 int vlen;
37};
38
39struct nfsd_createargs {
40 struct svc_fh fh;
41 char * name;
42 unsigned int len;
43 struct iattr attrs;
44};
45
46struct nfsd_renameargs {
47 struct svc_fh ffh;
48 char * fname;
49 unsigned int flen;
50 struct svc_fh tfh;
51 char * tname;
52 unsigned int tlen;
53};
54
55struct nfsd_readlinkargs {
56 struct svc_fh fh;
57 char * buffer;
58};
59
60struct nfsd_linkargs {
61 struct svc_fh ffh;
62 struct svc_fh tfh;
63 char * tname;
64 unsigned int tlen;
65};
66
67struct nfsd_symlinkargs {
68 struct svc_fh ffh;
69 char * fname;
70 unsigned int flen;
71 char * tname;
72 unsigned int tlen;
73 struct iattr attrs;
74};
75
76struct nfsd_readdirargs {
77 struct svc_fh fh;
78 __u32 cookie;
79 __u32 count;
80 __be32 * buffer;
81};
82
83struct nfsd_attrstat {
84 struct svc_fh fh;
85 struct kstat stat;
86};
87
88struct nfsd_diropres {
89 struct svc_fh fh;
90 struct kstat stat;
91};
92
93struct nfsd_readlinkres {
94 int len;
95};
96
97struct nfsd_readres {
98 struct svc_fh fh;
99 unsigned long count;
100 struct kstat stat;
101};
102
103struct nfsd_readdirres {
104 int count;
105
106 struct readdir_cd common;
107 __be32 * buffer;
108 int buflen;
109 __be32 * offset;
110};
111
112struct nfsd_statfsres {
113 struct kstatfs stats;
114};
115
116/*
117 * Storage requirements for XDR arguments and results.
118 */
119union nfsd_xdrstore {
120 struct nfsd_sattrargs sattr;
121 struct nfsd_diropargs dirop;
122 struct nfsd_readargs read;
123 struct nfsd_writeargs write;
124 struct nfsd_createargs create;
125 struct nfsd_renameargs rename;
126 struct nfsd_linkargs link;
127 struct nfsd_symlinkargs symlink;
128 struct nfsd_readdirargs readdir;
129};
130
131#define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore)
132
133
134int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
135int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
136int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
137 struct nfsd_sattrargs *);
138int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
139 struct nfsd_diropargs *);
140int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
141 struct nfsd_readargs *);
142int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
143 struct nfsd_writeargs *);
144int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
145 struct nfsd_createargs *);
146int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
147 struct nfsd_renameargs *);
148int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
149 struct nfsd_readlinkargs *);
150int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
151 struct nfsd_linkargs *);
152int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
153 struct nfsd_symlinkargs *);
154int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
155 struct nfsd_readdirargs *);
156int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
157int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
158int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
159int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
160int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
161int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
162int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
163
164int nfssvc_encode_entry(void *, const char *name,
165 int namlen, loff_t offset, u64 ino, unsigned int);
166
167int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
168
169/* Helper functions for NFSv2 ACL code */
170__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp);
171__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp);
172
173#endif /* LINUX_NFSD_H */
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
new file mode 100644
index 000000000000..7df980eb0562
--- /dev/null
+++ b/fs/nfsd/xdr3.h
@@ -0,0 +1,344 @@
1/*
2 * XDR types for NFSv3 in nfsd.
3 *
4 * Copyright (C) 1996-1998, Olaf Kirch <okir@monad.swb.de>
5 */
6
7#ifndef _LINUX_NFSD_XDR3_H
8#define _LINUX_NFSD_XDR3_H
9
10#include "xdr.h"
11
12struct nfsd3_sattrargs {
13 struct svc_fh fh;
14 struct iattr attrs;
15 int check_guard;
16 time_t guardtime;
17};
18
19struct nfsd3_diropargs {
20 struct svc_fh fh;
21 char * name;
22 unsigned int len;
23};
24
25struct nfsd3_accessargs {
26 struct svc_fh fh;
27 unsigned int access;
28};
29
30struct nfsd3_readargs {
31 struct svc_fh fh;
32 __u64 offset;
33 __u32 count;
34 int vlen;
35};
36
37struct nfsd3_writeargs {
38 svc_fh fh;
39 __u64 offset;
40 __u32 count;
41 int stable;
42 __u32 len;
43 int vlen;
44};
45
46struct nfsd3_createargs {
47 struct svc_fh fh;
48 char * name;
49 unsigned int len;
50 int createmode;
51 struct iattr attrs;
52 __be32 * verf;
53};
54
55struct nfsd3_mknodargs {
56 struct svc_fh fh;
57 char * name;
58 unsigned int len;
59 __u32 ftype;
60 __u32 major, minor;
61 struct iattr attrs;
62};
63
64struct nfsd3_renameargs {
65 struct svc_fh ffh;
66 char * fname;
67 unsigned int flen;
68 struct svc_fh tfh;
69 char * tname;
70 unsigned int tlen;
71};
72
73struct nfsd3_readlinkargs {
74 struct svc_fh fh;
75 char * buffer;
76};
77
78struct nfsd3_linkargs {
79 struct svc_fh ffh;
80 struct svc_fh tfh;
81 char * tname;
82 unsigned int tlen;
83};
84
85struct nfsd3_symlinkargs {
86 struct svc_fh ffh;
87 char * fname;
88 unsigned int flen;
89 char * tname;
90 unsigned int tlen;
91 struct iattr attrs;
92};
93
94struct nfsd3_readdirargs {
95 struct svc_fh fh;
96 __u64 cookie;
97 __u32 dircount;
98 __u32 count;
99 __be32 * verf;
100 __be32 * buffer;
101};
102
103struct nfsd3_commitargs {
104 struct svc_fh fh;
105 __u64 offset;
106 __u32 count;
107};
108
109struct nfsd3_getaclargs {
110 struct svc_fh fh;
111 int mask;
112};
113
114struct posix_acl;
115struct nfsd3_setaclargs {
116 struct svc_fh fh;
117 int mask;
118 struct posix_acl *acl_access;
119 struct posix_acl *acl_default;
120};
121
122struct nfsd3_attrstat {
123 __be32 status;
124 struct svc_fh fh;
125 struct kstat stat;
126};
127
128/* LOOKUP, CREATE, MKDIR, SYMLINK, MKNOD */
129struct nfsd3_diropres {
130 __be32 status;
131 struct svc_fh dirfh;
132 struct svc_fh fh;
133};
134
135struct nfsd3_accessres {
136 __be32 status;
137 struct svc_fh fh;
138 __u32 access;
139};
140
141struct nfsd3_readlinkres {
142 __be32 status;
143 struct svc_fh fh;
144 __u32 len;
145};
146
147struct nfsd3_readres {
148 __be32 status;
149 struct svc_fh fh;
150 unsigned long count;
151 int eof;
152};
153
154struct nfsd3_writeres {
155 __be32 status;
156 struct svc_fh fh;
157 unsigned long count;
158 int committed;
159};
160
161struct nfsd3_renameres {
162 __be32 status;
163 struct svc_fh ffh;
164 struct svc_fh tfh;
165};
166
167struct nfsd3_linkres {
168 __be32 status;
169 struct svc_fh tfh;
170 struct svc_fh fh;
171};
172
173struct nfsd3_readdirres {
174 __be32 status;
175 struct svc_fh fh;
176 int count;
177 __be32 verf[2];
178
179 struct readdir_cd common;
180 __be32 * buffer;
181 int buflen;
182 __be32 * offset;
183 __be32 * offset1;
184 struct svc_rqst * rqstp;
185
186};
187
188struct nfsd3_fsstatres {
189 __be32 status;
190 struct kstatfs stats;
191 __u32 invarsec;
192};
193
194struct nfsd3_fsinfores {
195 __be32 status;
196 __u32 f_rtmax;
197 __u32 f_rtpref;
198 __u32 f_rtmult;
199 __u32 f_wtmax;
200 __u32 f_wtpref;
201 __u32 f_wtmult;
202 __u32 f_dtpref;
203 __u64 f_maxfilesize;
204 __u32 f_properties;
205};
206
207struct nfsd3_pathconfres {
208 __be32 status;
209 __u32 p_link_max;
210 __u32 p_name_max;
211 __u32 p_no_trunc;
212 __u32 p_chown_restricted;
213 __u32 p_case_insensitive;
214 __u32 p_case_preserving;
215};
216
217struct nfsd3_commitres {
218 __be32 status;
219 struct svc_fh fh;
220};
221
222struct nfsd3_getaclres {
223 __be32 status;
224 struct svc_fh fh;
225 int mask;
226 struct posix_acl *acl_access;
227 struct posix_acl *acl_default;
228};
229
230/* dummy type for release */
231struct nfsd3_fhandle_pair {
232 __u32 dummy;
233 struct svc_fh fh1;
234 struct svc_fh fh2;
235};
236
237/*
238 * Storage requirements for XDR arguments and results.
239 */
240union nfsd3_xdrstore {
241 struct nfsd3_sattrargs sattrargs;
242 struct nfsd3_diropargs diropargs;
243 struct nfsd3_readargs readargs;
244 struct nfsd3_writeargs writeargs;
245 struct nfsd3_createargs createargs;
246 struct nfsd3_renameargs renameargs;
247 struct nfsd3_linkargs linkargs;
248 struct nfsd3_symlinkargs symlinkargs;
249 struct nfsd3_readdirargs readdirargs;
250 struct nfsd3_diropres diropres;
251 struct nfsd3_accessres accessres;
252 struct nfsd3_readlinkres readlinkres;
253 struct nfsd3_readres readres;
254 struct nfsd3_writeres writeres;
255 struct nfsd3_renameres renameres;
256 struct nfsd3_linkres linkres;
257 struct nfsd3_readdirres readdirres;
258 struct nfsd3_fsstatres fsstatres;
259 struct nfsd3_fsinfores fsinfores;
260 struct nfsd3_pathconfres pathconfres;
261 struct nfsd3_commitres commitres;
262 struct nfsd3_getaclres getaclres;
263};
264
265#define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore)
266
267int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
268int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
269 struct nfsd3_sattrargs *);
270int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
271 struct nfsd3_diropargs *);
272int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
273 struct nfsd3_accessargs *);
274int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
275 struct nfsd3_readargs *);
276int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
277 struct nfsd3_writeargs *);
278int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
279 struct nfsd3_createargs *);
280int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
281 struct nfsd3_createargs *);
282int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
283 struct nfsd3_mknodargs *);
284int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
285 struct nfsd3_renameargs *);
286int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
287 struct nfsd3_readlinkargs *);
288int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
289 struct nfsd3_linkargs *);
290int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
291 struct nfsd3_symlinkargs *);
292int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
293 struct nfsd3_readdirargs *);
294int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
295 struct nfsd3_readdirargs *);
296int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
297 struct nfsd3_commitargs *);
298int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
299int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
300 struct nfsd3_attrstat *);
301int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
302 struct nfsd3_attrstat *);
303int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
304 struct nfsd3_diropres *);
305int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
306 struct nfsd3_accessres *);
307int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
308 struct nfsd3_readlinkres *);
309int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
310int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
311int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
312 struct nfsd3_diropres *);
313int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
314 struct nfsd3_renameres *);
315int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
316 struct nfsd3_linkres *);
317int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
318 struct nfsd3_readdirres *);
319int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
320 struct nfsd3_fsstatres *);
321int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
322 struct nfsd3_fsinfores *);
323int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
324 struct nfsd3_pathconfres *);
325int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
326 struct nfsd3_commitres *);
327
328int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
329 struct nfsd3_attrstat *);
330int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
331 struct nfsd3_fhandle_pair *);
332int nfs3svc_encode_entry(void *, const char *name,
333 int namlen, loff_t offset, u64 ino,
334 unsigned int);
335int nfs3svc_encode_entry_plus(void *, const char *name,
336 int namlen, loff_t offset, u64 ino,
337 unsigned int);
338/* Helper functions for NFSv3 ACL code */
339__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p,
340 struct svc_fh *fhp);
341__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp);
342
343
344#endif /* _LINUX_NFSD_XDR3_H */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
new file mode 100644
index 000000000000..efa337739534
--- /dev/null
+++ b/fs/nfsd/xdr4.h
@@ -0,0 +1,562 @@
1/*
2 * Server-side types for NFSv4.
3 *
4 * Copyright (c) 2002 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Kendrick Smith <kmsmith@umich.edu>
8 * Andy Adamson <andros@umich.edu>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
24 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
25 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 */
36
37#ifndef _LINUX_NFSD_XDR4_H
38#define _LINUX_NFSD_XDR4_H
39
40#include "state.h"
41#include "nfsd.h"
42
43#define NFSD4_MAX_TAGLEN 128
44#define XDR_LEN(n) (((n) + 3) & ~3)
45
46struct nfsd4_compound_state {
47 struct svc_fh current_fh;
48 struct svc_fh save_fh;
49 struct nfs4_stateowner *replay_owner;
50 /* For sessions DRC */
51 struct nfsd4_session *session;
52 struct nfsd4_slot *slot;
53 __be32 *datap;
54 size_t iovlen;
55 u32 minorversion;
56 u32 status;
57};
58
59static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
60{
61 return cs->slot != NULL;
62}
63
64struct nfsd4_change_info {
65 u32 atomic;
66 bool change_supported;
67 u32 before_ctime_sec;
68 u32 before_ctime_nsec;
69 u64 before_change;
70 u32 after_ctime_sec;
71 u32 after_ctime_nsec;
72 u64 after_change;
73};
74
75struct nfsd4_access {
76 u32 ac_req_access; /* request */
77 u32 ac_supported; /* response */
78 u32 ac_resp_access; /* response */
79};
80
81struct nfsd4_close {
82 u32 cl_seqid; /* request */
83 stateid_t cl_stateid; /* request+response */
84 struct nfs4_stateowner * cl_stateowner; /* response */
85};
86
87struct nfsd4_commit {
88 u64 co_offset; /* request */
89 u32 co_count; /* request */
90 nfs4_verifier co_verf; /* response */
91};
92
93struct nfsd4_create {
94 u32 cr_namelen; /* request */
95 char * cr_name; /* request */
96 u32 cr_type; /* request */
97 union { /* request */
98 struct {
99 u32 namelen;
100 char *name;
101 } link; /* NF4LNK */
102 struct {
103 u32 specdata1;
104 u32 specdata2;
105 } dev; /* NF4BLK, NF4CHR */
106 } u;
107 u32 cr_bmval[3]; /* request */
108 struct iattr cr_iattr; /* request */
109 struct nfsd4_change_info cr_cinfo; /* response */
110 struct nfs4_acl *cr_acl;
111};
112#define cr_linklen u.link.namelen
113#define cr_linkname u.link.name
114#define cr_specdata1 u.dev.specdata1
115#define cr_specdata2 u.dev.specdata2
116
117struct nfsd4_delegreturn {
118 stateid_t dr_stateid;
119};
120
121struct nfsd4_getattr {
122 u32 ga_bmval[3]; /* request */
123 struct svc_fh *ga_fhp; /* response */
124};
125
126struct nfsd4_link {
127 u32 li_namelen; /* request */
128 char * li_name; /* request */
129 struct nfsd4_change_info li_cinfo; /* response */
130};
131
132struct nfsd4_lock_denied {
133 clientid_t ld_clientid;
134 struct nfs4_stateowner *ld_sop;
135 u64 ld_start;
136 u64 ld_length;
137 u32 ld_type;
138};
139
140struct nfsd4_lock {
141 /* request */
142 u32 lk_type;
143 u32 lk_reclaim; /* boolean */
144 u64 lk_offset;
145 u64 lk_length;
146 u32 lk_is_new;
147 union {
148 struct {
149 u32 open_seqid;
150 stateid_t open_stateid;
151 u32 lock_seqid;
152 clientid_t clientid;
153 struct xdr_netobj owner;
154 } new;
155 struct {
156 stateid_t lock_stateid;
157 u32 lock_seqid;
158 } old;
159 } v;
160
161 /* response */
162 union {
163 struct {
164 stateid_t stateid;
165 } ok;
166 struct nfsd4_lock_denied denied;
167 } u;
168 /* The lk_replay_owner is the open owner in the open_to_lock_owner
169 * case and the lock owner otherwise: */
170 struct nfs4_stateowner *lk_replay_owner;
171};
172#define lk_new_open_seqid v.new.open_seqid
173#define lk_new_open_stateid v.new.open_stateid
174#define lk_new_lock_seqid v.new.lock_seqid
175#define lk_new_clientid v.new.clientid
176#define lk_new_owner v.new.owner
177#define lk_old_lock_stateid v.old.lock_stateid
178#define lk_old_lock_seqid v.old.lock_seqid
179
180#define lk_rflags u.ok.rflags
181#define lk_resp_stateid u.ok.stateid
182#define lk_denied u.denied
183
184
185struct nfsd4_lockt {
186 u32 lt_type;
187 clientid_t lt_clientid;
188 struct xdr_netobj lt_owner;
189 u64 lt_offset;
190 u64 lt_length;
191 struct nfs4_stateowner * lt_stateowner;
192 struct nfsd4_lock_denied lt_denied;
193};
194
195
196struct nfsd4_locku {
197 u32 lu_type;
198 u32 lu_seqid;
199 stateid_t lu_stateid;
200 u64 lu_offset;
201 u64 lu_length;
202 struct nfs4_stateowner *lu_stateowner;
203};
204
205
206struct nfsd4_lookup {
207 u32 lo_len; /* request */
208 char * lo_name; /* request */
209};
210
211struct nfsd4_putfh {
212 u32 pf_fhlen; /* request */
213 char *pf_fhval; /* request */
214};
215
216struct nfsd4_open {
217 u32 op_claim_type; /* request */
218 struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */
219 u32 op_delegate_type; /* request - CLAIM_PREV only */
220 stateid_t op_delegate_stateid; /* request - response */
221 u32 op_create; /* request */
222 u32 op_createmode; /* request */
223 u32 op_bmval[3]; /* request */
224 struct iattr iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
225 nfs4_verifier verf; /* EXCLUSIVE4 */
226 clientid_t op_clientid; /* request */
227 struct xdr_netobj op_owner; /* request */
228 u32 op_seqid; /* request */
229 u32 op_share_access; /* request */
230 u32 op_share_deny; /* request */
231 stateid_t op_stateid; /* response */
232 u32 op_recall; /* recall */
233 struct nfsd4_change_info op_cinfo; /* response */
234 u32 op_rflags; /* response */
235 int op_truncate; /* used during processing */
236 struct nfs4_stateowner *op_stateowner; /* used during processing */
237 struct nfs4_acl *op_acl;
238};
239#define op_iattr iattr
240#define op_verf verf
241
242struct nfsd4_open_confirm {
243 stateid_t oc_req_stateid /* request */;
244 u32 oc_seqid /* request */;
245 stateid_t oc_resp_stateid /* response */;
246 struct nfs4_stateowner * oc_stateowner; /* response */
247};
248
249struct nfsd4_open_downgrade {
250 stateid_t od_stateid;
251 u32 od_seqid;
252 u32 od_share_access;
253 u32 od_share_deny;
254 struct nfs4_stateowner *od_stateowner;
255};
256
257
258struct nfsd4_read {
259 stateid_t rd_stateid; /* request */
260 u64 rd_offset; /* request */
261 u32 rd_length; /* request */
262 int rd_vlen;
263 struct file *rd_filp;
264
265 struct svc_rqst *rd_rqstp; /* response */
266 struct svc_fh * rd_fhp; /* response */
267};
268
269struct nfsd4_readdir {
270 u64 rd_cookie; /* request */
271 nfs4_verifier rd_verf; /* request */
272 u32 rd_dircount; /* request */
273 u32 rd_maxcount; /* request */
274 u32 rd_bmval[3]; /* request */
275 struct svc_rqst *rd_rqstp; /* response */
276 struct svc_fh * rd_fhp; /* response */
277
278 struct readdir_cd common;
279 __be32 * buffer;
280 int buflen;
281 __be32 * offset;
282};
283
284struct nfsd4_release_lockowner {
285 clientid_t rl_clientid;
286 struct xdr_netobj rl_owner;
287};
288struct nfsd4_readlink {
289 struct svc_rqst *rl_rqstp; /* request */
290 struct svc_fh * rl_fhp; /* request */
291};
292
293struct nfsd4_remove {
294 u32 rm_namelen; /* request */
295 char * rm_name; /* request */
296 struct nfsd4_change_info rm_cinfo; /* response */
297};
298
299struct nfsd4_rename {
300 u32 rn_snamelen; /* request */
301 char * rn_sname; /* request */
302 u32 rn_tnamelen; /* request */
303 char * rn_tname; /* request */
304 struct nfsd4_change_info rn_sinfo; /* response */
305 struct nfsd4_change_info rn_tinfo; /* response */
306};
307
308struct nfsd4_secinfo {
309 u32 si_namelen; /* request */
310 char *si_name; /* request */
311 struct svc_export *si_exp; /* response */
312};
313
314struct nfsd4_setattr {
315 stateid_t sa_stateid; /* request */
316 u32 sa_bmval[3]; /* request */
317 struct iattr sa_iattr; /* request */
318 struct nfs4_acl *sa_acl;
319};
320
321struct nfsd4_setclientid {
322 nfs4_verifier se_verf; /* request */
323 u32 se_namelen; /* request */
324 char * se_name; /* request */
325 u32 se_callback_prog; /* request */
326 u32 se_callback_netid_len; /* request */
327 char * se_callback_netid_val; /* request */
328 u32 se_callback_addr_len; /* request */
329 char * se_callback_addr_val; /* request */
330 u32 se_callback_ident; /* request */
331 clientid_t se_clientid; /* response */
332 nfs4_verifier se_confirm; /* response */
333};
334
335struct nfsd4_setclientid_confirm {
336 clientid_t sc_clientid;
337 nfs4_verifier sc_confirm;
338};
339
340/* also used for NVERIFY */
341struct nfsd4_verify {
342 u32 ve_bmval[3]; /* request */
343 u32 ve_attrlen; /* request */
344 char * ve_attrval; /* request */
345};
346
347struct nfsd4_write {
348 stateid_t wr_stateid; /* request */
349 u64 wr_offset; /* request */
350 u32 wr_stable_how; /* request */
351 u32 wr_buflen; /* request */
352 int wr_vlen;
353
354 u32 wr_bytes_written; /* response */
355 u32 wr_how_written; /* response */
356 nfs4_verifier wr_verifier; /* response */
357};
358
359struct nfsd4_exchange_id {
360 nfs4_verifier verifier;
361 struct xdr_netobj clname;
362 u32 flags;
363 clientid_t clientid;
364 u32 seqid;
365 int spa_how;
366};
367
368struct nfsd4_sequence {
369 struct nfs4_sessionid sessionid; /* request/response */
370 u32 seqid; /* request/response */
371 u32 slotid; /* request/response */
372 u32 maxslots; /* request/response */
373 u32 cachethis; /* request */
374#if 0
375 u32 target_maxslots; /* response */
376 u32 status_flags; /* response */
377#endif /* not yet */
378};
379
380struct nfsd4_destroy_session {
381 struct nfs4_sessionid sessionid;
382};
383
384struct nfsd4_op {
385 int opnum;
386 __be32 status;
387 union {
388 struct nfsd4_access access;
389 struct nfsd4_close close;
390 struct nfsd4_commit commit;
391 struct nfsd4_create create;
392 struct nfsd4_delegreturn delegreturn;
393 struct nfsd4_getattr getattr;
394 struct svc_fh * getfh;
395 struct nfsd4_link link;
396 struct nfsd4_lock lock;
397 struct nfsd4_lockt lockt;
398 struct nfsd4_locku locku;
399 struct nfsd4_lookup lookup;
400 struct nfsd4_verify nverify;
401 struct nfsd4_open open;
402 struct nfsd4_open_confirm open_confirm;
403 struct nfsd4_open_downgrade open_downgrade;
404 struct nfsd4_putfh putfh;
405 struct nfsd4_read read;
406 struct nfsd4_readdir readdir;
407 struct nfsd4_readlink readlink;
408 struct nfsd4_remove remove;
409 struct nfsd4_rename rename;
410 clientid_t renew;
411 struct nfsd4_secinfo secinfo;
412 struct nfsd4_setattr setattr;
413 struct nfsd4_setclientid setclientid;
414 struct nfsd4_setclientid_confirm setclientid_confirm;
415 struct nfsd4_verify verify;
416 struct nfsd4_write write;
417 struct nfsd4_release_lockowner release_lockowner;
418
419 /* NFSv4.1 */
420 struct nfsd4_exchange_id exchange_id;
421 struct nfsd4_create_session create_session;
422 struct nfsd4_destroy_session destroy_session;
423 struct nfsd4_sequence sequence;
424 } u;
425 struct nfs4_replay * replay;
426};
427
428struct nfsd4_compoundargs {
429 /* scratch variables for XDR decode */
430 __be32 * p;
431 __be32 * end;
432 struct page ** pagelist;
433 int pagelen;
434 __be32 tmp[8];
435 __be32 * tmpp;
436 struct tmpbuf {
437 struct tmpbuf *next;
438 void (*release)(const void *);
439 void *buf;
440 } *to_free;
441
442 struct svc_rqst *rqstp;
443
444 u32 taglen;
445 char * tag;
446 u32 minorversion;
447 u32 opcnt;
448 struct nfsd4_op *ops;
449 struct nfsd4_op iops[8];
450};
451
452struct nfsd4_compoundres {
453 /* scratch variables for XDR encode */
454 __be32 * p;
455 __be32 * end;
456 struct xdr_buf * xbuf;
457 struct svc_rqst * rqstp;
458
459 u32 taglen;
460 char * tag;
461 u32 opcnt;
462 __be32 * tagp; /* tag, opcount encode location */
463 struct nfsd4_compound_state cstate;
464};
465
466static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
467{
468 struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
469 return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
470}
471
472static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
473{
474 return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
475}
476
477#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
478
479static inline void
480set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
481{
482 BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
483 cinfo->atomic = 1;
484 cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
485 if (cinfo->change_supported) {
486 cinfo->before_change = fhp->fh_pre_change;
487 cinfo->after_change = fhp->fh_post_change;
488 } else {
489 cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
490 cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
491 cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
492 cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
493 }
494}
495
496int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
497int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
498 struct nfsd4_compoundargs *);
499int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
500 struct nfsd4_compoundres *);
501void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
502void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
503__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
504 struct dentry *dentry, __be32 *buffer, int *countp,
505 u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
506extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
507 struct nfsd4_compound_state *,
508 struct nfsd4_setclientid *setclid);
509extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
510 struct nfsd4_compound_state *,
511 struct nfsd4_setclientid_confirm *setclientid_confirm);
512extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
513extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
514 struct nfsd4_sequence *seq);
515extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
516 struct nfsd4_compound_state *,
517struct nfsd4_exchange_id *);
518 extern __be32 nfsd4_create_session(struct svc_rqst *,
519 struct nfsd4_compound_state *,
520 struct nfsd4_create_session *);
521extern __be32 nfsd4_sequence(struct svc_rqst *,
522 struct nfsd4_compound_state *,
523 struct nfsd4_sequence *);
524extern __be32 nfsd4_destroy_session(struct svc_rqst *,
525 struct nfsd4_compound_state *,
526 struct nfsd4_destroy_session *);
527extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
528 struct nfsd4_open *open);
529extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
530 struct svc_fh *current_fh, struct nfsd4_open *open);
531extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
532 struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
533extern __be32 nfsd4_close(struct svc_rqst *rqstp,
534 struct nfsd4_compound_state *,
535 struct nfsd4_close *close);
536extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp,
537 struct nfsd4_compound_state *,
538 struct nfsd4_open_downgrade *od);
539extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
540 struct nfsd4_lock *lock);
541extern __be32 nfsd4_lockt(struct svc_rqst *rqstp,
542 struct nfsd4_compound_state *,
543 struct nfsd4_lockt *lockt);
544extern __be32 nfsd4_locku(struct svc_rqst *rqstp,
545 struct nfsd4_compound_state *,
546 struct nfsd4_locku *locku);
547extern __be32
548nfsd4_release_lockowner(struct svc_rqst *rqstp,
549 struct nfsd4_compound_state *,
550 struct nfsd4_release_lockowner *rlockowner);
551extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *);
552extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
553 struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
554extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
555 struct nfsd4_compound_state *, clientid_t *clid);
556#endif
557
558/*
559 * Local variables:
560 * c-basic-offset: 8
561 * End:
562 */
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index f4a14ea2ed9c..effdbdbe6c11 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -417,8 +417,8 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
417 417
418 key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT - 418 key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT -
419 bmap->b_inode->i_blkbits); 419 bmap->b_inode->i_blkbits);
420 for (pbh = page_buffers(bh->b_page); pbh != bh; 420 for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page)
421 pbh = pbh->b_this_page, key++); 421 key++;
422 422
423 return key; 423 return key;
424} 424}
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index d5ad54e204a5..18737818db63 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -328,19 +328,24 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
328 tnicps += nicps; 328 tnicps += nicps;
329 nilfs_mdt_mark_buffer_dirty(cp_bh); 329 nilfs_mdt_mark_buffer_dirty(cp_bh);
330 nilfs_mdt_mark_dirty(cpfile); 330 nilfs_mdt_mark_dirty(cpfile);
331 if (!nilfs_cpfile_is_in_first(cpfile, cno) && 331 if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
332 (count = nilfs_cpfile_block_sub_valid_checkpoints( 332 count =
333 cpfile, cp_bh, kaddr, nicps)) == 0) { 333 nilfs_cpfile_block_sub_valid_checkpoints(
334 /* make hole */ 334 cpfile, cp_bh, kaddr, nicps);
335 kunmap_atomic(kaddr, KM_USER0); 335 if (count == 0) {
336 brelse(cp_bh); 336 /* make hole */
337 ret = nilfs_cpfile_delete_checkpoint_block( 337 kunmap_atomic(kaddr, KM_USER0);
338 cpfile, cno); 338 brelse(cp_bh);
339 if (ret == 0) 339 ret =
340 continue; 340 nilfs_cpfile_delete_checkpoint_block(
341 printk(KERN_ERR "%s: cannot delete block\n", 341 cpfile, cno);
342 __func__); 342 if (ret == 0)
343 break; 343 continue;
344 printk(KERN_ERR
345 "%s: cannot delete block\n",
346 __func__);
347 break;
348 }
344 } 349 }
345 } 350 }
346 351
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index d369ac718277..236753df5cdf 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -51,11 +51,11 @@ static int nilfs_direct_lookup(const struct nilfs_bmap *bmap,
51 struct nilfs_direct *direct; 51 struct nilfs_direct *direct;
52 __u64 ptr; 52 __u64 ptr;
53 53
54 direct = (struct nilfs_direct *)bmap; 54 direct = (struct nilfs_direct *)bmap; /* XXX: use macro for level 1 */
55 if ((key > NILFS_DIRECT_KEY_MAX) || 55 if (key > NILFS_DIRECT_KEY_MAX || level != 1)
56 (level != 1) || /* XXX: use macro for level 1 */ 56 return -ENOENT;
57 ((ptr = nilfs_direct_get_ptr(direct, key)) == 57 ptr = nilfs_direct_get_ptr(direct, key);
58 NILFS_BMAP_INVALID_PTR)) 58 if (ptr == NILFS_BMAP_INVALID_PTR)
59 return -ENOENT; 59 return -ENOENT;
60 60
61 if (ptrp != NULL) 61 if (ptrp != NULL)
@@ -73,9 +73,10 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap,
73 sector_t blocknr; 73 sector_t blocknr;
74 int ret, cnt; 74 int ret, cnt;
75 75
76 if (key > NILFS_DIRECT_KEY_MAX || 76 if (key > NILFS_DIRECT_KEY_MAX)
77 (ptr = nilfs_direct_get_ptr(direct, key)) == 77 return -ENOENT;
78 NILFS_BMAP_INVALID_PTR) 78 ptr = nilfs_direct_get_ptr(direct, key);
79 if (ptr == NILFS_BMAP_INVALID_PTR)
79 return -ENOENT; 80 return -ENOENT;
80 81
81 if (NILFS_BMAP_USE_VBN(bmap)) { 82 if (NILFS_BMAP_USE_VBN(bmap)) {
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f6af76042d80..d6b2b83de363 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -480,7 +480,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
480 unsigned int cmd, void __user *argp) 480 unsigned int cmd, void __user *argp)
481{ 481{
482 struct nilfs_argv argv[5]; 482 struct nilfs_argv argv[5];
483 const static size_t argsz[5] = { 483 static const size_t argsz[5] = {
484 sizeof(struct nilfs_vdesc), 484 sizeof(struct nilfs_vdesc),
485 sizeof(struct nilfs_period), 485 sizeof(struct nilfs_period),
486 sizeof(__u64), 486 sizeof(__u64),
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 5403b3ef3a42..8173faee31e6 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1118,8 +1118,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1118 /* Abandoning the newly allocated superblock */ 1118 /* Abandoning the newly allocated superblock */
1119 mutex_unlock(&nilfs->ns_mount_mutex); 1119 mutex_unlock(&nilfs->ns_mount_mutex);
1120 put_nilfs(nilfs); 1120 put_nilfs(nilfs);
1121 up_write(&s->s_umount); 1121 deactivate_locked_super(s);
1122 deactivate_super(s);
1123 /* 1122 /*
1124 * deactivate_super() invokes close_bdev_exclusive(). 1123 * deactivate_super() invokes close_bdev_exclusive().
1125 * We must finish all post-cleaning before this call; 1124 * We must finish all post-cleaning before this call;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index c9ee67b442e1..1afb0a10229f 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -121,7 +121,7 @@ static int idr_callback(int id, void *p, void *data)
121 if (warned) 121 if (warned)
122 return 0; 122 return 0;
123 123
124 warned = false; 124 warned = true;
125 entry = p; 125 entry = p;
126 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 126 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
127 127
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 5ef5f365a5c8..a94e8bd8eb1f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -552,7 +552,7 @@ retry:
552 552
553 spin_lock(&group->inotify_data.idr_lock); 553 spin_lock(&group->inotify_data.idr_lock);
554 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, 554 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
555 group->inotify_data.last_wd, 555 group->inotify_data.last_wd+1,
556 &tmp_ientry->wd); 556 &tmp_ientry->wd);
557 spin_unlock(&group->inotify_data.idr_lock); 557 spin_unlock(&group->inotify_data.idr_lock);
558 if (ret) { 558 if (ret) {
@@ -632,7 +632,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
632 632
633 spin_lock_init(&group->inotify_data.idr_lock); 633 spin_lock_init(&group->inotify_data.idr_lock);
634 idr_init(&group->inotify_data.idr); 634 idr_init(&group->inotify_data.idr);
635 group->inotify_data.last_wd = 1; 635 group->inotify_data.last_wd = 0;
636 group->inotify_data.user = user; 636 group->inotify_data.user = user;
637 group->inotify_data.fa = NULL; 637 group->inotify_data.fa = NULL;
638 638
@@ -646,6 +646,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
646 struct fsnotify_group *group; 646 struct fsnotify_group *group;
647 struct user_struct *user; 647 struct user_struct *user;
648 struct file *filp; 648 struct file *filp;
649 struct path path;
649 int fd, ret; 650 int fd, ret;
650 651
651 /* Check the IN_* constants for consistency. */ 652 /* Check the IN_* constants for consistency. */
@@ -659,12 +660,6 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
659 if (fd < 0) 660 if (fd < 0)
660 return fd; 661 return fd;
661 662
662 filp = get_empty_filp();
663 if (!filp) {
664 ret = -ENFILE;
665 goto out_put_fd;
666 }
667
668 user = get_current_user(); 663 user = get_current_user();
669 if (unlikely(atomic_read(&user->inotify_devs) >= 664 if (unlikely(atomic_read(&user->inotify_devs) >=
670 inotify_max_user_instances)) { 665 inotify_max_user_instances)) {
@@ -679,24 +674,28 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
679 goto out_free_uid; 674 goto out_free_uid;
680 } 675 }
681 676
682 filp->f_op = &inotify_fops; 677 atomic_inc(&user->inotify_devs);
683 filp->f_path.mnt = mntget(inotify_mnt); 678
684 filp->f_path.dentry = dget(inotify_mnt->mnt_root); 679 path.mnt = inotify_mnt;
685 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; 680 path.dentry = inotify_mnt->mnt_root;
686 filp->f_mode = FMODE_READ; 681 path_get(&path);
682 filp = alloc_file(&path, FMODE_READ, &inotify_fops);
683 if (!filp)
684 goto Enfile;
685
687 filp->f_flags = O_RDONLY | (flags & O_NONBLOCK); 686 filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
688 filp->private_data = group; 687 filp->private_data = group;
689 688
690 atomic_inc(&user->inotify_devs);
691
692 fd_install(fd, filp); 689 fd_install(fd, filp);
693 690
694 return fd; 691 return fd;
695 692
693Enfile:
694 ret = -ENFILE;
695 path_put(&path);
696 atomic_dec(&user->inotify_devs);
696out_free_uid: 697out_free_uid:
697 free_uid(user); 698 free_uid(user);
698 put_filp(filp);
699out_put_fd:
700 put_unused_fd(fd); 699 put_unused_fd(fd);
701 return ret; 700 return ret;
702} 701}
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 9938034762cc..dc2505abb6d7 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -530,7 +530,7 @@ err_corrupt_attr:
530 * the ntfs inode. 530 * the ntfs inode.
531 * 531 *
532 * Q: What locks are held when the function is called? 532 * Q: What locks are held when the function is called?
533 * A: i_state has I_LOCK set, hence the inode is locked, also 533 * A: i_state has I_NEW set, hence the inode is locked, also
534 * i_count is set to 1, so it is not going to go away 534 * i_count is set to 1, so it is not going to go away
535 * i_flags is set to 0 and we have no business touching it. Only an ioctl() 535 * i_flags is set to 0 and we have no business touching it. Only an ioctl()
536 * is allowed to write to them. We should of course be honouring them but 536 * is allowed to write to them. We should of course be honouring them but
@@ -1207,7 +1207,7 @@ err_out:
1207 * necessary fields in @vi as well as initializing the ntfs inode. 1207 * necessary fields in @vi as well as initializing the ntfs inode.
1208 * 1208 *
1209 * Q: What locks are held when the function is called? 1209 * Q: What locks are held when the function is called?
1210 * A: i_state has I_LOCK set, hence the inode is locked, also 1210 * A: i_state has I_NEW set, hence the inode is locked, also
1211 * i_count is set to 1, so it is not going to go away 1211 * i_count is set to 1, so it is not going to go away
1212 * 1212 *
1213 * Return 0 on success and -errno on error. In the error case, the inode will 1213 * Return 0 on success and -errno on error. In the error case, the inode will
@@ -1474,7 +1474,7 @@ err_out:
1474 * normal directory inodes. 1474 * normal directory inodes.
1475 * 1475 *
1476 * Q: What locks are held when the function is called? 1476 * Q: What locks are held when the function is called?
1477 * A: i_state has I_LOCK set, hence the inode is locked, also 1477 * A: i_state has I_NEW set, hence the inode is locked, also
1478 * i_count is set to 1, so it is not going to go away 1478 * i_count is set to 1, so it is not going to go away
1479 * 1479 *
1480 * Return 0 on success and -errno on error. In the error case, the inode will 1480 * Return 0 on success and -errno on error. In the error case, the inode will
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
index 701b7a3a872e..0d840669698e 100644
--- a/fs/ocfs2/Kconfig
+++ b/fs/ocfs2/Kconfig
@@ -6,6 +6,7 @@ config OCFS2_FS
6 select CRC32 6 select CRC32
7 select QUOTA 7 select QUOTA
8 select QUOTA_TREE 8 select QUOTA_TREE
9 select FS_POSIX_ACL
9 help 10 help
10 OCFS2 is a general purpose extent based shared disk cluster file 11 OCFS2 is a general purpose extent based shared disk cluster file
11 system with many similarities to ext3. It supports 64 bit inode 12 system with many similarities to ext3. It supports 64 bit inode
@@ -74,12 +75,3 @@ config OCFS2_DEBUG_FS
74 This option will enable expensive consistency checks. Enable 75 This option will enable expensive consistency checks. Enable
75 this option for debugging only as it is likely to decrease 76 this option for debugging only as it is likely to decrease
76 performance of the filesystem. 77 performance of the filesystem.
77
78config OCFS2_FS_POSIX_ACL
79 bool "OCFS2 POSIX Access Control Lists"
80 depends on OCFS2_FS
81 select FS_POSIX_ACL
82 default n
83 help
84 Posix Access Control Lists (ACLs) support permissions for users and
85 groups beyond the owner/group/world scheme.
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 31f25ce32c97..600d2d2ade11 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -39,11 +39,8 @@ ocfs2-objs := \
39 ver.o \ 39 ver.o \
40 quota_local.o \ 40 quota_local.o \
41 quota_global.o \ 41 quota_global.o \
42 xattr.o 42 xattr.o \
43 43 acl.o
44ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y)
45ocfs2-objs += acl.o
46endif
47 44
48ocfs2_stackglue-objs := stackglue.o 45ocfs2_stackglue-objs := stackglue.o
49ocfs2_stack_o2cb-objs := stack_o2cb.o 46ocfs2_stack_o2cb-objs := stack_o2cb.o
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index fbeaec762103..0501974bedd0 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -98,15 +98,11 @@ static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode,
98 int type, 98 int type,
99 struct buffer_head *di_bh) 99 struct buffer_head *di_bh)
100{ 100{
101 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
102 int name_index; 101 int name_index;
103 char *value = NULL; 102 char *value = NULL;
104 struct posix_acl *acl; 103 struct posix_acl *acl;
105 int retval; 104 int retval;
106 105
107 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
108 return NULL;
109
110 switch (type) { 106 switch (type) {
111 case ACL_TYPE_ACCESS: 107 case ACL_TYPE_ACCESS:
112 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; 108 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
@@ -331,13 +327,14 @@ cleanup:
331 return ret; 327 return ret;
332} 328}
333 329
334static size_t ocfs2_xattr_list_acl_access(struct inode *inode, 330static size_t ocfs2_xattr_list_acl_access(struct dentry *dentry,
335 char *list, 331 char *list,
336 size_t list_len, 332 size_t list_len,
337 const char *name, 333 const char *name,
338 size_t name_len) 334 size_t name_len,
335 int type)
339{ 336{
340 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 337 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
341 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 338 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
342 339
343 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 340 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
@@ -348,13 +345,14 @@ static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
348 return size; 345 return size;
349} 346}
350 347
351static size_t ocfs2_xattr_list_acl_default(struct inode *inode, 348static size_t ocfs2_xattr_list_acl_default(struct dentry *dentry,
352 char *list, 349 char *list,
353 size_t list_len, 350 size_t list_len,
354 const char *name, 351 const char *name,
355 size_t name_len) 352 size_t name_len,
353 int type)
356{ 354{
357 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 355 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
358 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 356 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
359 357
360 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 358 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
@@ -365,19 +363,19 @@ static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
365 return size; 363 return size;
366} 364}
367 365
368static int ocfs2_xattr_get_acl(struct inode *inode, 366static int ocfs2_xattr_get_acl(struct dentry *dentry, const char *name,
369 int type, 367 void *buffer, size_t size, int type)
370 void *buffer,
371 size_t size)
372{ 368{
373 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 369 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
374 struct posix_acl *acl; 370 struct posix_acl *acl;
375 int ret; 371 int ret;
376 372
373 if (strcmp(name, "") != 0)
374 return -EINVAL;
377 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 375 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
378 return -EOPNOTSUPP; 376 return -EOPNOTSUPP;
379 377
380 acl = ocfs2_get_acl(inode, type); 378 acl = ocfs2_get_acl(dentry->d_inode, type);
381 if (IS_ERR(acl)) 379 if (IS_ERR(acl))
382 return PTR_ERR(acl); 380 return PTR_ERR(acl);
383 if (acl == NULL) 381 if (acl == NULL)
@@ -388,35 +386,16 @@ static int ocfs2_xattr_get_acl(struct inode *inode,
388 return ret; 386 return ret;
389} 387}
390 388
391static int ocfs2_xattr_get_acl_access(struct inode *inode, 389static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name,
392 const char *name, 390 const void *value, size_t size, int flags, int type)
393 void *buffer,
394 size_t size)
395{
396 if (strcmp(name, "") != 0)
397 return -EINVAL;
398 return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
399}
400
401static int ocfs2_xattr_get_acl_default(struct inode *inode,
402 const char *name,
403 void *buffer,
404 size_t size)
405{
406 if (strcmp(name, "") != 0)
407 return -EINVAL;
408 return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
409}
410
411static int ocfs2_xattr_set_acl(struct inode *inode,
412 int type,
413 const void *value,
414 size_t size)
415{ 391{
392 struct inode *inode = dentry->d_inode;
416 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 393 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
417 struct posix_acl *acl; 394 struct posix_acl *acl;
418 int ret = 0; 395 int ret = 0;
419 396
397 if (strcmp(name, "") != 0)
398 return -EINVAL;
420 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 399 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
421 return -EOPNOTSUPP; 400 return -EOPNOTSUPP;
422 401
@@ -442,38 +421,18 @@ cleanup:
442 return ret; 421 return ret;
443} 422}
444 423
445static int ocfs2_xattr_set_acl_access(struct inode *inode,
446 const char *name,
447 const void *value,
448 size_t size,
449 int flags)
450{
451 if (strcmp(name, "") != 0)
452 return -EINVAL;
453 return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
454}
455
456static int ocfs2_xattr_set_acl_default(struct inode *inode,
457 const char *name,
458 const void *value,
459 size_t size,
460 int flags)
461{
462 if (strcmp(name, "") != 0)
463 return -EINVAL;
464 return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
465}
466
467struct xattr_handler ocfs2_xattr_acl_access_handler = { 424struct xattr_handler ocfs2_xattr_acl_access_handler = {
468 .prefix = POSIX_ACL_XATTR_ACCESS, 425 .prefix = POSIX_ACL_XATTR_ACCESS,
426 .flags = ACL_TYPE_ACCESS,
469 .list = ocfs2_xattr_list_acl_access, 427 .list = ocfs2_xattr_list_acl_access,
470 .get = ocfs2_xattr_get_acl_access, 428 .get = ocfs2_xattr_get_acl,
471 .set = ocfs2_xattr_set_acl_access, 429 .set = ocfs2_xattr_set_acl,
472}; 430};
473 431
474struct xattr_handler ocfs2_xattr_acl_default_handler = { 432struct xattr_handler ocfs2_xattr_acl_default_handler = {
475 .prefix = POSIX_ACL_XATTR_DEFAULT, 433 .prefix = POSIX_ACL_XATTR_DEFAULT,
434 .flags = ACL_TYPE_DEFAULT,
476 .list = ocfs2_xattr_list_acl_default, 435 .list = ocfs2_xattr_list_acl_default,
477 .get = ocfs2_xattr_get_acl_default, 436 .get = ocfs2_xattr_get_acl,
478 .set = ocfs2_xattr_set_acl_default, 437 .set = ocfs2_xattr_set_acl,
479}; 438};
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 8f6389ed4da5..5c5d31f05853 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -26,8 +26,6 @@ struct ocfs2_acl_entry {
26 __le32 e_id; 26 __le32 e_id;
27}; 27};
28 28
29#ifdef CONFIG_OCFS2_FS_POSIX_ACL
30
31extern int ocfs2_check_acl(struct inode *, int); 29extern int ocfs2_check_acl(struct inode *, int);
32extern int ocfs2_acl_chmod(struct inode *); 30extern int ocfs2_acl_chmod(struct inode *);
33extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, 31extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
@@ -35,24 +33,4 @@ extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
35 struct ocfs2_alloc_context *, 33 struct ocfs2_alloc_context *,
36 struct ocfs2_alloc_context *); 34 struct ocfs2_alloc_context *);
37 35
38#else /* CONFIG_OCFS2_FS_POSIX_ACL*/
39
40#define ocfs2_check_acl NULL
41static inline int ocfs2_acl_chmod(struct inode *inode)
42{
43 return 0;
44}
45static inline int ocfs2_init_acl(handle_t *handle,
46 struct inode *inode,
47 struct inode *dir,
48 struct buffer_head *di_bh,
49 struct buffer_head *dir_bh,
50 struct ocfs2_alloc_context *meta_ac,
51 struct ocfs2_alloc_context *data_ac)
52{
53 return 0;
54}
55
56#endif /* CONFIG_OCFS2_FS_POSIX_ACL*/
57
58#endif /* OCFS2_ACL_H */ 36#endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 7c7198a5bc90..d17bdc718f74 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1765,9 +1765,9 @@ set_and_inc:
1765 * 1765 *
1766 * The array index of the subtree root is passed back. 1766 * The array index of the subtree root is passed back.
1767 */ 1767 */
1768static int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, 1768int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
1769 struct ocfs2_path *left, 1769 struct ocfs2_path *left,
1770 struct ocfs2_path *right) 1770 struct ocfs2_path *right)
1771{ 1771{
1772 int i = 0; 1772 int i = 0;
1773 1773
@@ -2872,8 +2872,8 @@ out:
2872 * This looks similar, but is subtly different to 2872 * This looks similar, but is subtly different to
2873 * ocfs2_find_cpos_for_left_leaf(). 2873 * ocfs2_find_cpos_for_left_leaf().
2874 */ 2874 */
2875static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, 2875int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
2876 struct ocfs2_path *path, u32 *cpos) 2876 struct ocfs2_path *path, u32 *cpos)
2877{ 2877{
2878 int i, j, ret = 0; 2878 int i, j, ret = 0;
2879 u64 blkno; 2879 u64 blkno;
@@ -7190,8 +7190,8 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
7190 * wait on them - the truncate_inode_pages() call later will 7190 * wait on them - the truncate_inode_pages() call later will
7191 * do that for us. 7191 * do that for us.
7192 */ 7192 */
7193 ret = do_sync_mapping_range(inode->i_mapping, range_start, 7193 ret = filemap_fdatawrite_range(inode->i_mapping, range_start,
7194 range_end - 1, SYNC_FILE_RANGE_WRITE); 7194 range_end - 1);
7195 if (ret) 7195 if (ret)
7196 mlog_errno(ret); 7196 mlog_errno(ret);
7197 7197
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 9c122d574464..1db4359ccb90 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -317,4 +317,9 @@ int ocfs2_path_bh_journal_access(handle_t *handle,
317int ocfs2_journal_access_path(struct ocfs2_caching_info *ci, 317int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
318 handle_t *handle, 318 handle_t *handle,
319 struct ocfs2_path *path); 319 struct ocfs2_path *path);
320int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
321 struct ocfs2_path *path, u32 *cpos);
322int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
323 struct ocfs2_path *left,
324 struct ocfs2_path *right);
320#endif /* OCFS2_ALLOC_H */ 325#endif /* OCFS2_ALLOC_H */
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index deb2b132ae5e..3dae4a13f6e4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -547,6 +547,9 @@ bail:
547 * 547 *
548 * called like this: dio->get_blocks(dio->inode, fs_startblk, 548 * called like this: dio->get_blocks(dio->inode, fs_startblk,
549 * fs_count, map_bh, dio->rw == WRITE); 549 * fs_count, map_bh, dio->rw == WRITE);
550 *
551 * Note that we never bother to allocate blocks here, and thus ignore the
552 * create argument.
550 */ 553 */
551static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, 554static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
552 struct buffer_head *bh_result, int create) 555 struct buffer_head *bh_result, int create)
@@ -563,14 +566,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
563 566
564 inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); 567 inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
565 568
566 /*
567 * Any write past EOF is not allowed because we'd be extending.
568 */
569 if (create && (iblock + max_blocks) > inode_blocks) {
570 ret = -EIO;
571 goto bail;
572 }
573
574 /* This figures out the size of the next contiguous block, and 569 /* This figures out the size of the next contiguous block, and
575 * our logical offset */ 570 * our logical offset */
576 ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, 571 ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
@@ -582,15 +577,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
582 goto bail; 577 goto bail;
583 } 578 }
584 579
585 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) {
586 ocfs2_error(inode->i_sb,
587 "Inode %llu has a hole at block %llu\n",
588 (unsigned long long)OCFS2_I(inode)->ip_blkno,
589 (unsigned long long)iblock);
590 ret = -EROFS;
591 goto bail;
592 }
593
594 /* We should already CoW the refcounted extent. */ 580 /* We should already CoW the refcounted extent. */
595 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); 581 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
596 /* 582 /*
@@ -601,20 +587,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
601 */ 587 */
602 if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) 588 if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
603 map_bh(bh_result, inode->i_sb, p_blkno); 589 map_bh(bh_result, inode->i_sb, p_blkno);
604 else { 590 else
605 /*
606 * ocfs2_prepare_inode_for_write() should have caught
607 * the case where we'd be filling a hole and triggered
608 * a buffered write instead.
609 */
610 if (create) {
611 ret = -EIO;
612 mlog_errno(ret);
613 goto bail;
614 }
615
616 clear_buffer_mapped(bh_result); 591 clear_buffer_mapped(bh_result);
617 }
618 592
619 /* make sure we don't map more than max_blocks blocks here as 593 /* make sure we don't map more than max_blocks blocks here as
620 that's all the kernel will handle at this point. */ 594 that's all the kernel will handle at this point. */
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index c452d116b892..eda5b8bcddd5 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -176,7 +176,8 @@ static void o2hb_write_timeout(struct work_struct *work)
176 176
177static void o2hb_arm_write_timeout(struct o2hb_region *reg) 177static void o2hb_arm_write_timeout(struct o2hb_region *reg)
178{ 178{
179 mlog(0, "Queue write timeout for %u ms\n", O2HB_MAX_WRITE_TIMEOUT_MS); 179 mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n",
180 O2HB_MAX_WRITE_TIMEOUT_MS);
180 181
181 cancel_delayed_work(&reg->hr_write_timeout_work); 182 cancel_delayed_work(&reg->hr_write_timeout_work);
182 reg->hr_last_timeout_start = jiffies; 183 reg->hr_last_timeout_start = jiffies;
@@ -874,7 +875,8 @@ static int o2hb_thread(void *data)
874 do_gettimeofday(&after_hb); 875 do_gettimeofday(&after_hb);
875 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); 876 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
876 877
877 mlog(0, "start = %lu.%lu, end = %lu.%lu, msec = %u\n", 878 mlog(ML_HEARTBEAT,
879 "start = %lu.%lu, end = %lu.%lu, msec = %u\n",
878 before_hb.tv_sec, (unsigned long) before_hb.tv_usec, 880 before_hb.tv_sec, (unsigned long) before_hb.tv_usec,
879 after_hb.tv_sec, (unsigned long) after_hb.tv_usec, 881 after_hb.tv_sec, (unsigned long) after_hb.tv_usec,
880 elapsed_msec); 882 elapsed_msec);
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 7ee6188bc79a..c81142e3ef84 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -35,6 +35,10 @@
35 * cluster references throughout where nodes are looked up */ 35 * cluster references throughout where nodes are looked up */
36struct o2nm_cluster *o2nm_single_cluster = NULL; 36struct o2nm_cluster *o2nm_single_cluster = NULL;
37 37
38char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = {
39 "reset", /* O2NM_FENCE_RESET */
40 "panic", /* O2NM_FENCE_PANIC */
41};
38 42
39struct o2nm_node *o2nm_get_node_by_num(u8 node_num) 43struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
40{ 44{
@@ -579,6 +583,43 @@ static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write(
579 return o2nm_cluster_attr_write(page, count, 583 return o2nm_cluster_attr_write(page, count,
580 &cluster->cl_reconnect_delay_ms); 584 &cluster->cl_reconnect_delay_ms);
581} 585}
586
587static ssize_t o2nm_cluster_attr_fence_method_read(
588 struct o2nm_cluster *cluster, char *page)
589{
590 ssize_t ret = 0;
591
592 if (cluster)
593 ret = sprintf(page, "%s\n",
594 o2nm_fence_method_desc[cluster->cl_fence_method]);
595 return ret;
596}
597
598static ssize_t o2nm_cluster_attr_fence_method_write(
599 struct o2nm_cluster *cluster, const char *page, size_t count)
600{
601 unsigned int i;
602
603 if (page[count - 1] != '\n')
604 goto bail;
605
606 for (i = 0; i < O2NM_FENCE_METHODS; ++i) {
607 if (count != strlen(o2nm_fence_method_desc[i]) + 1)
608 continue;
609 if (strncasecmp(page, o2nm_fence_method_desc[i], count - 1))
610 continue;
611 if (cluster->cl_fence_method != i) {
612 printk(KERN_INFO "ocfs2: Changing fence method to %s\n",
613 o2nm_fence_method_desc[i]);
614 cluster->cl_fence_method = i;
615 }
616 return count;
617 }
618
619bail:
620 return -EINVAL;
621}
622
582static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { 623static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = {
583 .attr = { .ca_owner = THIS_MODULE, 624 .attr = { .ca_owner = THIS_MODULE,
584 .ca_name = "idle_timeout_ms", 625 .ca_name = "idle_timeout_ms",
@@ -603,10 +644,19 @@ static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = {
603 .store = o2nm_cluster_attr_reconnect_delay_ms_write, 644 .store = o2nm_cluster_attr_reconnect_delay_ms_write,
604}; 645};
605 646
647static struct o2nm_cluster_attribute o2nm_cluster_attr_fence_method = {
648 .attr = { .ca_owner = THIS_MODULE,
649 .ca_name = "fence_method",
650 .ca_mode = S_IRUGO | S_IWUSR },
651 .show = o2nm_cluster_attr_fence_method_read,
652 .store = o2nm_cluster_attr_fence_method_write,
653};
654
606static struct configfs_attribute *o2nm_cluster_attrs[] = { 655static struct configfs_attribute *o2nm_cluster_attrs[] = {
607 &o2nm_cluster_attr_idle_timeout_ms.attr, 656 &o2nm_cluster_attr_idle_timeout_ms.attr,
608 &o2nm_cluster_attr_keepalive_delay_ms.attr, 657 &o2nm_cluster_attr_keepalive_delay_ms.attr,
609 &o2nm_cluster_attr_reconnect_delay_ms.attr, 658 &o2nm_cluster_attr_reconnect_delay_ms.attr,
659 &o2nm_cluster_attr_fence_method.attr,
610 NULL, 660 NULL,
611}; 661};
612static ssize_t o2nm_cluster_show(struct config_item *item, 662static ssize_t o2nm_cluster_show(struct config_item *item,
@@ -778,6 +828,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
778 cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT; 828 cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
779 cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; 829 cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
780 cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; 830 cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
831 cluster->cl_fence_method = O2NM_FENCE_RESET;
781 832
782 ret = &cluster->cl_group; 833 ret = &cluster->cl_group;
783 o2nm_single_cluster = cluster; 834 o2nm_single_cluster = cluster;
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h
index c992ea0da4ad..09ea2d388bbb 100644
--- a/fs/ocfs2/cluster/nodemanager.h
+++ b/fs/ocfs2/cluster/nodemanager.h
@@ -33,6 +33,12 @@
33#include <linux/configfs.h> 33#include <linux/configfs.h>
34#include <linux/rbtree.h> 34#include <linux/rbtree.h>
35 35
36enum o2nm_fence_method {
37 O2NM_FENCE_RESET = 0,
38 O2NM_FENCE_PANIC,
39 O2NM_FENCE_METHODS, /* Number of fence methods */
40};
41
36struct o2nm_node { 42struct o2nm_node {
37 spinlock_t nd_lock; 43 spinlock_t nd_lock;
38 struct config_item nd_item; 44 struct config_item nd_item;
@@ -58,6 +64,7 @@ struct o2nm_cluster {
58 unsigned int cl_idle_timeout_ms; 64 unsigned int cl_idle_timeout_ms;
59 unsigned int cl_keepalive_delay_ms; 65 unsigned int cl_keepalive_delay_ms;
60 unsigned int cl_reconnect_delay_ms; 66 unsigned int cl_reconnect_delay_ms;
67 enum o2nm_fence_method cl_fence_method;
61 68
62 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ 69 /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
63 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; 70 unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index bbacf7da48a4..639024033fce 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -74,8 +74,20 @@ static void o2quo_fence_self(void)
74 * threads can still schedule, etc, etc */ 74 * threads can still schedule, etc, etc */
75 o2hb_stop_all_regions(); 75 o2hb_stop_all_regions();
76 76
77 printk("ocfs2 is very sorry to be fencing this system by restarting\n"); 77 switch (o2nm_single_cluster->cl_fence_method) {
78 emergency_restart(); 78 case O2NM_FENCE_PANIC:
79 panic("*** ocfs2 is very sorry to be fencing this system by "
80 "panicing ***\n");
81 break;
82 default:
83 WARN_ON(o2nm_single_cluster->cl_fence_method >=
84 O2NM_FENCE_METHODS);
85 case O2NM_FENCE_RESET:
86 printk(KERN_ERR "*** ocfs2 is very sorry to be fencing this "
87 "system by restarting ***\n");
88 emergency_restart();
89 break;
90 };
79} 91}
80 92
81/* Indicate that a timeout occured on a hearbeat region write. The 93/* Indicate that a timeout occured on a hearbeat region write. The
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index d9fa3d22e17c..2f9e4e19a4f2 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2589,6 +2589,14 @@ retry:
2589 "begin reco msg (%d)\n", dlm->name, nodenum, ret); 2589 "begin reco msg (%d)\n", dlm->name, nodenum, ret);
2590 ret = 0; 2590 ret = 0;
2591 } 2591 }
2592 if (ret == -EAGAIN) {
2593 mlog(0, "%s: trying to start recovery of node "
2594 "%u, but node %u is waiting for last recovery "
2595 "to complete, backoff for a bit\n", dlm->name,
2596 dead_node, nodenum);
2597 msleep(100);
2598 goto retry;
2599 }
2592 if (ret < 0) { 2600 if (ret < 0) {
2593 struct dlm_lock_resource *res; 2601 struct dlm_lock_resource *res;
2594 /* this is now a serious problem, possibly ENOMEM 2602 /* this is now a serious problem, possibly ENOMEM
@@ -2608,14 +2616,6 @@ retry:
2608 * another ENOMEM */ 2616 * another ENOMEM */
2609 msleep(100); 2617 msleep(100);
2610 goto retry; 2618 goto retry;
2611 } else if (ret == EAGAIN) {
2612 mlog(0, "%s: trying to start recovery of node "
2613 "%u, but node %u is waiting for last recovery "
2614 "to complete, backoff for a bit\n", dlm->name,
2615 dead_node, nodenum);
2616 /* TODO Look into replacing msleep with cond_resched() */
2617 msleep(100);
2618 goto retry;
2619 } 2619 }
2620 } 2620 }
2621 2621
@@ -2639,7 +2639,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
2639 dlm->name, br->node_idx, br->dead_node, 2639 dlm->name, br->node_idx, br->dead_node,
2640 dlm->reco.dead_node, dlm->reco.new_master); 2640 dlm->reco.dead_node, dlm->reco.new_master);
2641 spin_unlock(&dlm->spinlock); 2641 spin_unlock(&dlm->spinlock);
2642 return EAGAIN; 2642 return -EAGAIN;
2643 } 2643 }
2644 spin_unlock(&dlm->spinlock); 2644 spin_unlock(&dlm->spinlock);
2645 2645
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 843db64e9d4a..d35a27f4523e 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -37,6 +37,7 @@
37#include "extent_map.h" 37#include "extent_map.h"
38#include "inode.h" 38#include "inode.h"
39#include "super.h" 39#include "super.h"
40#include "symlink.h"
40 41
41#include "buffer_head_io.h" 42#include "buffer_head_io.h"
42 43
@@ -703,6 +704,12 @@ out:
703 return ret; 704 return ret;
704} 705}
705 706
707/*
708 * The ocfs2_fiemap_inline() may be a little bit misleading, since
709 * it not only handles the fiemap for inlined files, but also deals
710 * with the fast symlink, cause they have no difference for extent
711 * mapping per se.
712 */
706static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, 713static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
707 struct fiemap_extent_info *fieinfo, 714 struct fiemap_extent_info *fieinfo,
708 u64 map_start) 715 u64 map_start)
@@ -715,11 +722,18 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
715 struct ocfs2_inode_info *oi = OCFS2_I(inode); 722 struct ocfs2_inode_info *oi = OCFS2_I(inode);
716 723
717 di = (struct ocfs2_dinode *)di_bh->b_data; 724 di = (struct ocfs2_dinode *)di_bh->b_data;
718 id_count = le16_to_cpu(di->id2.i_data.id_count); 725 if (ocfs2_inode_is_fast_symlink(inode))
726 id_count = ocfs2_fast_symlink_chars(inode->i_sb);
727 else
728 id_count = le16_to_cpu(di->id2.i_data.id_count);
719 729
720 if (map_start < id_count) { 730 if (map_start < id_count) {
721 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; 731 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
722 phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data); 732 if (ocfs2_inode_is_fast_symlink(inode))
733 phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
734 else
735 phys += offsetof(struct ocfs2_dinode,
736 id2.i_data.id_data);
723 737
724 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, 738 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
725 flags); 739 flags);
@@ -756,9 +770,10 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
756 down_read(&OCFS2_I(inode)->ip_alloc_sem); 770 down_read(&OCFS2_I(inode)->ip_alloc_sem);
757 771
758 /* 772 /*
759 * Handle inline-data separately. 773 * Handle inline-data and fast symlink separately.
760 */ 774 */
761 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 775 if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
776 ocfs2_inode_is_fast_symlink(inode)) {
762 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); 777 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
763 goto out_unlock; 778 goto out_unlock;
764 } 779 }
@@ -786,6 +801,8 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
786 fe_flags = 0; 801 fe_flags = 0;
787 if (rec.e_flags & OCFS2_EXT_UNWRITTEN) 802 if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
788 fe_flags |= FIEMAP_EXTENT_UNWRITTEN; 803 fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
804 if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
805 fe_flags |= FIEMAP_EXTENT_SHARED;
789 if (is_last) 806 if (is_last)
790 fe_flags |= FIEMAP_EXTENT_LAST; 807 fe_flags |= FIEMAP_EXTENT_LAST;
791 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; 808 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 3d30a1c974a8..06ccf6a86d35 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1772,7 +1772,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1772 loff_t *ppos, 1772 loff_t *ppos,
1773 size_t count, 1773 size_t count,
1774 int appending, 1774 int appending,
1775 int *direct_io) 1775 int *direct_io,
1776 int *has_refcount)
1776{ 1777{
1777 int ret = 0, meta_level = 0; 1778 int ret = 0, meta_level = 0;
1778 struct inode *inode = dentry->d_inode; 1779 struct inode *inode = dentry->d_inode;
@@ -1833,6 +1834,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1833 saved_pos, 1834 saved_pos,
1834 count, 1835 count,
1835 &meta_level); 1836 &meta_level);
1837 if (has_refcount)
1838 *has_refcount = 1;
1836 } 1839 }
1837 1840
1838 if (ret < 0) { 1841 if (ret < 0) {
@@ -1856,6 +1859,10 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1856 break; 1859 break;
1857 } 1860 }
1858 1861
1862 if (has_refcount && *has_refcount == 1) {
1863 *direct_io = 0;
1864 break;
1865 }
1859 /* 1866 /*
1860 * Allowing concurrent direct writes means 1867 * Allowing concurrent direct writes means
1861 * i_size changes wouldn't be synchronized, so 1868 * i_size changes wouldn't be synchronized, so
@@ -1899,7 +1906,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1899 loff_t pos) 1906 loff_t pos)
1900{ 1907{
1901 int ret, direct_io, appending, rw_level, have_alloc_sem = 0; 1908 int ret, direct_io, appending, rw_level, have_alloc_sem = 0;
1902 int can_do_direct; 1909 int can_do_direct, has_refcount = 0;
1903 ssize_t written = 0; 1910 ssize_t written = 0;
1904 size_t ocount; /* original count */ 1911 size_t ocount; /* original count */
1905 size_t count; /* after file limit checks */ 1912 size_t count; /* after file limit checks */
@@ -1942,7 +1949,7 @@ relock:
1942 can_do_direct = direct_io; 1949 can_do_direct = direct_io;
1943 ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos, 1950 ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos,
1944 iocb->ki_left, appending, 1951 iocb->ki_left, appending,
1945 &can_do_direct); 1952 &can_do_direct, &has_refcount);
1946 if (ret < 0) { 1953 if (ret < 0) {
1947 mlog_errno(ret); 1954 mlog_errno(ret);
1948 goto out; 1955 goto out;
@@ -2006,14 +2013,16 @@ out_dio:
2006 /* buffered aio wouldn't have proper lock coverage today */ 2013 /* buffered aio wouldn't have proper lock coverage today */
2007 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 2014 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
2008 2015
2009 if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode)) { 2016 if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode) ||
2017 (file->f_flags & O_DIRECT && has_refcount)) {
2010 ret = filemap_fdatawrite_range(file->f_mapping, pos, 2018 ret = filemap_fdatawrite_range(file->f_mapping, pos,
2011 pos + count - 1); 2019 pos + count - 1);
2012 if (ret < 0) 2020 if (ret < 0)
2013 written = ret; 2021 written = ret;
2014 2022
2015 if (!ret && (old_size != i_size_read(inode) || 2023 if (!ret && (old_size != i_size_read(inode) ||
2016 old_clusters != OCFS2_I(inode)->ip_clusters)) { 2024 old_clusters != OCFS2_I(inode)->ip_clusters ||
2025 has_refcount)) {
2017 ret = jbd2_journal_force_commit(osb->journal->j_journal); 2026 ret = jbd2_journal_force_commit(osb->journal->j_journal);
2018 if (ret < 0) 2027 if (ret < 0)
2019 written = ret; 2028 written = ret;
@@ -2062,7 +2071,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
2062 int ret; 2071 int ret;
2063 2072
2064 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, 2073 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
2065 sd->total_len, 0, NULL); 2074 sd->total_len, 0, NULL, NULL);
2066 if (ret < 0) { 2075 if (ret < 0) {
2067 mlog_errno(ret); 2076 mlog_errno(ret);
2068 return ret; 2077 return ret;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f010b22b1c44..50fb26a6a5f5 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2108,6 +2108,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2108 } 2108 }
2109 did_quota_inode = 1; 2109 did_quota_inode = 1;
2110 2110
2111 inode->i_nlink = 0;
2111 /* do the real work now. */ 2112 /* do the real work now. */
2112 status = ocfs2_mknod_locked(osb, dir, inode, 2113 status = ocfs2_mknod_locked(osb, dir, inode,
2113 0, &new_di_bh, parent_di_bh, handle, 2114 0, &new_di_bh, parent_di_bh, handle,
@@ -2136,6 +2137,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2136 if (status < 0) 2137 if (status < 0)
2137 mlog_errno(status); 2138 mlog_errno(status);
2138 2139
2140 insert_inode_hash(inode);
2139leave: 2141leave:
2140 if (status < 0 && did_quota_inode) 2142 if (status < 0 && did_quota_inode)
2141 vfs_dq_free_inode(inode); 2143 vfs_dq_free_inode(inode);
@@ -2267,6 +2269,8 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2267 di = (struct ocfs2_dinode *)di_bh->b_data; 2269 di = (struct ocfs2_dinode *)di_bh->b_data;
2268 le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL); 2270 le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL);
2269 di->i_orphaned_slot = 0; 2271 di->i_orphaned_slot = 0;
2272 inode->i_nlink = 1;
2273 ocfs2_set_links_count(di, inode->i_nlink);
2270 ocfs2_journal_dirty(handle, di_bh); 2274 ocfs2_journal_dirty(handle, di_bh);
2271 2275
2272 status = ocfs2_add_entry(handle, dentry, inode, 2276 status = ocfs2_add_entry(handle, dentry, inode,
@@ -2284,7 +2288,6 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2284 goto out_commit; 2288 goto out_commit;
2285 } 2289 }
2286 2290
2287 insert_inode_hash(inode);
2288 dentry->d_op = &ocfs2_dentry_ops; 2291 dentry->d_op = &ocfs2_dentry_ops;
2289 d_instantiate(dentry, inode); 2292 d_instantiate(dentry, inode);
2290 status = 0; 2293 status = 0;
@@ -2326,4 +2329,5 @@ const struct inode_operations ocfs2_dir_iops = {
2326 .getxattr = generic_getxattr, 2329 .getxattr = generic_getxattr,
2327 .listxattr = ocfs2_listxattr, 2330 .listxattr = ocfs2_listxattr,
2328 .removexattr = generic_removexattr, 2331 .removexattr = generic_removexattr,
2332 .fiemap = ocfs2_fiemap,
2329}; 2333};
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d963d8638709..9362eea7424b 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -245,9 +245,11 @@ enum ocfs2_mount_options
245 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ 245 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
246 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ 246 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
247 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ 247 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */
248 OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* POSIX access control lists */ 248 OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* Force POSIX access control lists */
249 OCFS2_MOUNT_USRQUOTA = 1 << 9, /* We support user quotas */ 249 OCFS2_MOUNT_NO_POSIX_ACL = 1 << 9, /* Disable POSIX access
250 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ 250 control lists */
251 OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */
252 OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */
251}; 253};
252 254
253#define OCFS2_OSB_SOFT_RO 0x0001 255#define OCFS2_OSB_SOFT_RO 0x0001
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index e9431e4a5e7c..1a1a679e51b5 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -1202,7 +1202,7 @@ struct ocfs2_local_disk_dqinfo {
1202/* Header of one chunk of a quota file */ 1202/* Header of one chunk of a quota file */
1203struct ocfs2_local_disk_chunk { 1203struct ocfs2_local_disk_chunk {
1204 __le32 dqc_free; /* Number of free entries in the bitmap */ 1204 __le32 dqc_free; /* Number of free entries in the bitmap */
1205 u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding 1205 __u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding
1206 * chunk of quota file */ 1206 * chunk of quota file */
1207}; 1207};
1208 1208
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 30967e3f5e43..74db2be75dd6 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -276,7 +276,7 @@ static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
276 spin_unlock(&osb->osb_lock); 276 spin_unlock(&osb->osb_lock);
277} 277}
278 278
279void ocfs2_kref_remove_refcount_tree(struct kref *kref) 279static void ocfs2_kref_remove_refcount_tree(struct kref *kref)
280{ 280{
281 struct ocfs2_refcount_tree *tree = 281 struct ocfs2_refcount_tree *tree =
282 container_of(kref, struct ocfs2_refcount_tree, rf_getcnt); 282 container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
@@ -524,23 +524,6 @@ out:
524 return ret; 524 return ret;
525} 525}
526 526
527int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw,
528 struct ocfs2_refcount_tree **ret_tree,
529 struct buffer_head **ref_bh)
530{
531 int ret;
532 u64 ref_blkno;
533
534 ret = ocfs2_get_refcount_block(inode, &ref_blkno);
535 if (ret) {
536 mlog_errno(ret);
537 return ret;
538 }
539
540 return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno,
541 rw, ret_tree, ref_bh);
542}
543
544void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb, 527void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
545 struct ocfs2_refcount_tree *tree, int rw) 528 struct ocfs2_refcount_tree *tree, int rw)
546{ 529{
@@ -969,6 +952,103 @@ out:
969} 952}
970 953
971/* 954/*
955 * Find the end range for a leaf refcount block indicated by
956 * el->l_recs[index].e_blkno.
957 */
958static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci,
959 struct buffer_head *ref_root_bh,
960 struct ocfs2_extent_block *eb,
961 struct ocfs2_extent_list *el,
962 int index, u32 *cpos_end)
963{
964 int ret, i, subtree_root;
965 u32 cpos;
966 u64 blkno;
967 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
968 struct ocfs2_path *left_path = NULL, *right_path = NULL;
969 struct ocfs2_extent_tree et;
970 struct ocfs2_extent_list *tmp_el;
971
972 if (index < le16_to_cpu(el->l_next_free_rec) - 1) {
973 /*
974 * We have a extent rec after index, so just use the e_cpos
975 * of the next extent rec.
976 */
977 *cpos_end = le32_to_cpu(el->l_recs[index+1].e_cpos);
978 return 0;
979 }
980
981 if (!eb || (eb && !eb->h_next_leaf_blk)) {
982 /*
983 * We are the last extent rec, so any high cpos should
984 * be stored in this leaf refcount block.
985 */
986 *cpos_end = UINT_MAX;
987 return 0;
988 }
989
990 /*
991 * If the extent block isn't the last one, we have to find
992 * the subtree root between this extent block and the next
993 * leaf extent block and get the corresponding e_cpos from
994 * the subroot. Otherwise we may corrupt the b-tree.
995 */
996 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
997
998 left_path = ocfs2_new_path_from_et(&et);
999 if (!left_path) {
1000 ret = -ENOMEM;
1001 mlog_errno(ret);
1002 goto out;
1003 }
1004
1005 cpos = le32_to_cpu(eb->h_list.l_recs[index].e_cpos);
1006 ret = ocfs2_find_path(ci, left_path, cpos);
1007 if (ret) {
1008 mlog_errno(ret);
1009 goto out;
1010 }
1011
1012 right_path = ocfs2_new_path_from_path(left_path);
1013 if (!right_path) {
1014 ret = -ENOMEM;
1015 mlog_errno(ret);
1016 goto out;
1017 }
1018
1019 ret = ocfs2_find_cpos_for_right_leaf(sb, left_path, &cpos);
1020 if (ret) {
1021 mlog_errno(ret);
1022 goto out;
1023 }
1024
1025 ret = ocfs2_find_path(ci, right_path, cpos);
1026 if (ret) {
1027 mlog_errno(ret);
1028 goto out;
1029 }
1030
1031 subtree_root = ocfs2_find_subtree_root(&et, left_path,
1032 right_path);
1033
1034 tmp_el = left_path->p_node[subtree_root].el;
1035 blkno = left_path->p_node[subtree_root+1].bh->b_blocknr;
1036 for (i = 0; i < le32_to_cpu(tmp_el->l_next_free_rec); i++) {
1037 if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) {
1038 *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos);
1039 break;
1040 }
1041 }
1042
1043 BUG_ON(i == le32_to_cpu(tmp_el->l_next_free_rec));
1044
1045out:
1046 ocfs2_free_path(left_path);
1047 ocfs2_free_path(right_path);
1048 return ret;
1049}
1050
1051/*
972 * Given a cpos and len, try to find the refcount record which contains cpos. 1052 * Given a cpos and len, try to find the refcount record which contains cpos.
973 * 1. If cpos can be found in one refcount record, return the record. 1053 * 1. If cpos can be found in one refcount record, return the record.
974 * 2. If cpos can't be found, return a fake record which start from cpos 1054 * 2. If cpos can't be found, return a fake record which start from cpos
@@ -983,10 +1063,10 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
983 struct buffer_head **ret_bh) 1063 struct buffer_head **ret_bh)
984{ 1064{
985 int ret = 0, i, found; 1065 int ret = 0, i, found;
986 u32 low_cpos; 1066 u32 low_cpos, uninitialized_var(cpos_end);
987 struct ocfs2_extent_list *el; 1067 struct ocfs2_extent_list *el;
988 struct ocfs2_extent_rec *tmp, *rec = NULL; 1068 struct ocfs2_extent_rec *rec = NULL;
989 struct ocfs2_extent_block *eb; 1069 struct ocfs2_extent_block *eb = NULL;
990 struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL; 1070 struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL;
991 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1071 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
992 struct ocfs2_refcount_block *rb = 1072 struct ocfs2_refcount_block *rb =
@@ -1034,12 +1114,16 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
1034 } 1114 }
1035 } 1115 }
1036 1116
1037 /* adjust len when we have ocfs2_extent_rec after it. */ 1117 if (found) {
1038 if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) { 1118 ret = ocfs2_get_refcount_cpos_end(ci, ref_root_bh,
1039 tmp = &el->l_recs[i+1]; 1119 eb, el, i, &cpos_end);
1120 if (ret) {
1121 mlog_errno(ret);
1122 goto out;
1123 }
1040 1124
1041 if (le32_to_cpu(tmp->e_cpos) < cpos + len) 1125 if (cpos_end < low_cpos + len)
1042 len = le32_to_cpu(tmp->e_cpos) - cpos; 1126 len = cpos_end - low_cpos;
1043 } 1127 }
1044 1128
1045 ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno), 1129 ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno),
@@ -1418,7 +1502,7 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
1418 1502
1419 /* change old and new rl_used accordingly. */ 1503 /* change old and new rl_used accordingly. */
1420 le16_add_cpu(&rl->rl_used, -num_moved); 1504 le16_add_cpu(&rl->rl_used, -num_moved);
1421 new_rl->rl_used = cpu_to_le32(num_moved); 1505 new_rl->rl_used = cpu_to_le16(num_moved);
1422 1506
1423 sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), 1507 sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
1424 sizeof(struct ocfs2_refcount_rec), 1508 sizeof(struct ocfs2_refcount_rec),
@@ -1797,7 +1881,8 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
1797 recs_need++; 1881 recs_need++;
1798 1882
1799 /* If the leaf block don't have enough record, expand it. */ 1883 /* If the leaf block don't have enough record, expand it. */
1800 if (le16_to_cpu(rf_list->rl_used) + recs_need > rf_list->rl_count) { 1884 if (le16_to_cpu(rf_list->rl_used) + recs_need >
1885 le16_to_cpu(rf_list->rl_count)) {
1801 struct ocfs2_refcount_rec tmp_rec; 1886 struct ocfs2_refcount_rec tmp_rec;
1802 u64 cpos = le64_to_cpu(orig_rec->r_cpos); 1887 u64 cpos = le64_to_cpu(orig_rec->r_cpos);
1803 len = le32_to_cpu(orig_rec->r_clusters); 1888 len = le32_to_cpu(orig_rec->r_clusters);
@@ -1859,7 +1944,7 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
1859 memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec)); 1944 memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec));
1860 le64_add_cpu(&tail_rec->r_cpos, 1945 le64_add_cpu(&tail_rec->r_cpos,
1861 le32_to_cpu(tail_rec->r_clusters) - len); 1946 le32_to_cpu(tail_rec->r_clusters) - len);
1862 tail_rec->r_clusters = le32_to_cpu(len); 1947 tail_rec->r_clusters = cpu_to_le32(len);
1863 } 1948 }
1864 1949
1865 /* 1950 /*
@@ -3840,8 +3925,7 @@ static int ocfs2_add_refcounted_extent(struct inode *inode,
3840 } 3925 }
3841 3926
3842 ret = ocfs2_insert_extent(handle, et, cpos, 3927 ret = ocfs2_insert_extent(handle, et, cpos,
3843 cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb, 3928 ocfs2_clusters_to_blocks(inode->i_sb, p_cluster),
3844 p_cluster)),
3845 num_clusters, ext_flags, meta_ac); 3929 num_clusters, ext_flags, meta_ac);
3846 if (ret) { 3930 if (ret) {
3847 mlog_errno(ret); 3931 mlog_errno(ret);
@@ -4253,8 +4337,8 @@ static int ocfs2_user_path_parent(const char __user *path,
4253 * @new_dentry: target dentry 4337 * @new_dentry: target dentry
4254 * @preserve: if true, preserve all file attributes 4338 * @preserve: if true, preserve all file attributes
4255 */ 4339 */
4256int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir, 4340static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
4257 struct dentry *new_dentry, bool preserve) 4341 struct dentry *new_dentry, bool preserve)
4258{ 4342{
4259 struct inode *inode = old_dentry->d_inode; 4343 struct inode *inode = old_dentry->d_inode;
4260 int error; 4344 int error;
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index ff4c798a5635..da78a2a334fd 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -814,7 +814,7 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
814static int user_cluster_connect(struct ocfs2_cluster_connection *conn) 814static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
815{ 815{
816 dlm_lockspace_t *fsdlm; 816 dlm_lockspace_t *fsdlm;
817 struct ocfs2_live_connection *control; 817 struct ocfs2_live_connection *uninitialized_var(control);
818 int rc = 0; 818 int rc = 0;
819 819
820 BUG_ON(conn == NULL); 820 BUG_ON(conn == NULL);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 14f47d2bfe02..26069917a9f5 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -100,6 +100,8 @@ struct mount_options
100static int ocfs2_parse_options(struct super_block *sb, char *options, 100static int ocfs2_parse_options(struct super_block *sb, char *options,
101 struct mount_options *mopt, 101 struct mount_options *mopt,
102 int is_remount); 102 int is_remount);
103static int ocfs2_check_set_options(struct super_block *sb,
104 struct mount_options *options);
103static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt); 105static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt);
104static void ocfs2_put_super(struct super_block *sb); 106static void ocfs2_put_super(struct super_block *sb);
105static int ocfs2_mount_volume(struct super_block *sb); 107static int ocfs2_mount_volume(struct super_block *sb);
@@ -600,7 +602,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
600 602
601 lock_kernel(); 603 lock_kernel();
602 604
603 if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { 605 if (!ocfs2_parse_options(sb, data, &parsed_options, 1) ||
606 !ocfs2_check_set_options(sb, &parsed_options)) {
604 ret = -EINVAL; 607 ret = -EINVAL;
605 goto out; 608 goto out;
606 } 609 }
@@ -691,8 +694,6 @@ unlock_osb:
691 if (!ret) { 694 if (!ret) {
692 /* Only save off the new mount options in case of a successful 695 /* Only save off the new mount options in case of a successful
693 * remount. */ 696 * remount. */
694 if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
695 parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
696 osb->s_mount_opt = parsed_options.mount_opt; 697 osb->s_mount_opt = parsed_options.mount_opt;
697 osb->s_atime_quantum = parsed_options.atime_quantum; 698 osb->s_atime_quantum = parsed_options.atime_quantum;
698 osb->preferred_slot = parsed_options.slot; 699 osb->preferred_slot = parsed_options.slot;
@@ -701,6 +702,10 @@ unlock_osb:
701 702
702 if (!ocfs2_is_hard_readonly(osb)) 703 if (!ocfs2_is_hard_readonly(osb))
703 ocfs2_set_journal_params(osb); 704 ocfs2_set_journal_params(osb);
705
706 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
707 ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ?
708 MS_POSIXACL : 0);
704 } 709 }
705out: 710out:
706 unlock_kernel(); 711 unlock_kernel();
@@ -1011,31 +1016,16 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1011 brelse(bh); 1016 brelse(bh);
1012 bh = NULL; 1017 bh = NULL;
1013 1018
1014 if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)) 1019 if (!ocfs2_check_set_options(sb, &parsed_options)) {
1015 parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; 1020 status = -EINVAL;
1016 1021 goto read_super_error;
1022 }
1017 osb->s_mount_opt = parsed_options.mount_opt; 1023 osb->s_mount_opt = parsed_options.mount_opt;
1018 osb->s_atime_quantum = parsed_options.atime_quantum; 1024 osb->s_atime_quantum = parsed_options.atime_quantum;
1019 osb->preferred_slot = parsed_options.slot; 1025 osb->preferred_slot = parsed_options.slot;
1020 osb->osb_commit_interval = parsed_options.commit_interval; 1026 osb->osb_commit_interval = parsed_options.commit_interval;
1021 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); 1027 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
1022 osb->local_alloc_bits = osb->local_alloc_default_bits; 1028 osb->local_alloc_bits = osb->local_alloc_default_bits;
1023 if (osb->s_mount_opt & OCFS2_MOUNT_USRQUOTA &&
1024 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1025 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1026 status = -EINVAL;
1027 mlog(ML_ERROR, "User quotas were requested, but this "
1028 "filesystem does not have the feature enabled.\n");
1029 goto read_super_error;
1030 }
1031 if (osb->s_mount_opt & OCFS2_MOUNT_GRPQUOTA &&
1032 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1033 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1034 status = -EINVAL;
1035 mlog(ML_ERROR, "Group quotas were requested, but this "
1036 "filesystem does not have the feature enabled.\n");
1037 goto read_super_error;
1038 }
1039 1029
1040 status = ocfs2_verify_userspace_stack(osb, &parsed_options); 1030 status = ocfs2_verify_userspace_stack(osb, &parsed_options);
1041 if (status) 1031 if (status)
@@ -1245,6 +1235,40 @@ static struct file_system_type ocfs2_fs_type = {
1245 .next = NULL 1235 .next = NULL
1246}; 1236};
1247 1237
1238static int ocfs2_check_set_options(struct super_block *sb,
1239 struct mount_options *options)
1240{
1241 if (options->mount_opt & OCFS2_MOUNT_USRQUOTA &&
1242 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1243 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1244 mlog(ML_ERROR, "User quotas were requested, but this "
1245 "filesystem does not have the feature enabled.\n");
1246 return 0;
1247 }
1248 if (options->mount_opt & OCFS2_MOUNT_GRPQUOTA &&
1249 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1250 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1251 mlog(ML_ERROR, "Group quotas were requested, but this "
1252 "filesystem does not have the feature enabled.\n");
1253 return 0;
1254 }
1255 if (options->mount_opt & OCFS2_MOUNT_POSIX_ACL &&
1256 !OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR)) {
1257 mlog(ML_ERROR, "ACL support requested but extended attributes "
1258 "feature is not enabled\n");
1259 return 0;
1260 }
1261 /* No ACL setting specified? Use XATTR feature... */
1262 if (!(options->mount_opt & (OCFS2_MOUNT_POSIX_ACL |
1263 OCFS2_MOUNT_NO_POSIX_ACL))) {
1264 if (OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR))
1265 options->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
1266 else
1267 options->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
1268 }
1269 return 1;
1270}
1271
1248static int ocfs2_parse_options(struct super_block *sb, 1272static int ocfs2_parse_options(struct super_block *sb,
1249 char *options, 1273 char *options,
1250 struct mount_options *mopt, 1274 struct mount_options *mopt,
@@ -1392,40 +1416,19 @@ static int ocfs2_parse_options(struct super_block *sb,
1392 mopt->mount_opt |= OCFS2_MOUNT_INODE64; 1416 mopt->mount_opt |= OCFS2_MOUNT_INODE64;
1393 break; 1417 break;
1394 case Opt_usrquota: 1418 case Opt_usrquota:
1395 /* We check only on remount, otherwise features
1396 * aren't yet initialized. */
1397 if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1398 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1399 mlog(ML_ERROR, "User quota requested but "
1400 "filesystem feature is not set\n");
1401 status = 0;
1402 goto bail;
1403 }
1404 mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA; 1419 mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA;
1405 break; 1420 break;
1406 case Opt_grpquota: 1421 case Opt_grpquota:
1407 if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1408 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1409 mlog(ML_ERROR, "Group quota requested but "
1410 "filesystem feature is not set\n");
1411 status = 0;
1412 goto bail;
1413 }
1414 mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; 1422 mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
1415 break; 1423 break;
1416#ifdef CONFIG_OCFS2_FS_POSIX_ACL
1417 case Opt_acl: 1424 case Opt_acl:
1418 mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; 1425 mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
1426 mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL;
1419 break; 1427 break;
1420 case Opt_noacl: 1428 case Opt_noacl:
1429 mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
1421 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; 1430 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
1422 break; 1431 break;
1423#else
1424 case Opt_acl:
1425 case Opt_noacl:
1426 printk(KERN_INFO "ocfs2 (no)acl options not supported\n");
1427 break;
1428#endif
1429 default: 1432 default:
1430 mlog(ML_ERROR, 1433 mlog(ML_ERROR,
1431 "Unrecognized mount option \"%s\" " 1434 "Unrecognized mount option \"%s\" "
@@ -1502,12 +1505,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1502 if (opts & OCFS2_MOUNT_INODE64) 1505 if (opts & OCFS2_MOUNT_INODE64)
1503 seq_printf(s, ",inode64"); 1506 seq_printf(s, ",inode64");
1504 1507
1505#ifdef CONFIG_OCFS2_FS_POSIX_ACL
1506 if (opts & OCFS2_MOUNT_POSIX_ACL) 1508 if (opts & OCFS2_MOUNT_POSIX_ACL)
1507 seq_printf(s, ",acl"); 1509 seq_printf(s, ",acl");
1508 else 1510 else
1509 seq_printf(s, ",noacl"); 1511 seq_printf(s, ",noacl");
1510#endif
1511 1512
1512 return 0; 1513 return 0;
1513} 1514}
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index e3421030a69f..49b133ccbf11 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -163,6 +163,7 @@ const struct inode_operations ocfs2_symlink_inode_operations = {
163 .getxattr = generic_getxattr, 163 .getxattr = generic_getxattr,
164 .listxattr = ocfs2_listxattr, 164 .listxattr = ocfs2_listxattr,
165 .removexattr = generic_removexattr, 165 .removexattr = generic_removexattr,
166 .fiemap = ocfs2_fiemap,
166}; 167};
167const struct inode_operations ocfs2_fast_symlink_inode_operations = { 168const struct inode_operations ocfs2_fast_symlink_inode_operations = {
168 .readlink = ocfs2_readlink, 169 .readlink = ocfs2_readlink,
@@ -174,4 +175,5 @@ const struct inode_operations ocfs2_fast_symlink_inode_operations = {
174 .getxattr = generic_getxattr, 175 .getxattr = generic_getxattr,
175 .listxattr = ocfs2_listxattr, 176 .listxattr = ocfs2_listxattr,
176 .removexattr = generic_removexattr, 177 .removexattr = generic_removexattr,
178 .fiemap = ocfs2_fiemap,
177}; 179};
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index fe3419068df2..8fc6fb071c6d 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -98,10 +98,8 @@ static struct ocfs2_xattr_def_value_root def_xv = {
98 98
99struct xattr_handler *ocfs2_xattr_handlers[] = { 99struct xattr_handler *ocfs2_xattr_handlers[] = {
100 &ocfs2_xattr_user_handler, 100 &ocfs2_xattr_user_handler,
101#ifdef CONFIG_OCFS2_FS_POSIX_ACL
102 &ocfs2_xattr_acl_access_handler, 101 &ocfs2_xattr_acl_access_handler,
103 &ocfs2_xattr_acl_default_handler, 102 &ocfs2_xattr_acl_default_handler,
104#endif
105 &ocfs2_xattr_trusted_handler, 103 &ocfs2_xattr_trusted_handler,
106 &ocfs2_xattr_security_handler, 104 &ocfs2_xattr_security_handler,
107 NULL 105 NULL
@@ -109,12 +107,10 @@ struct xattr_handler *ocfs2_xattr_handlers[] = {
109 107
110static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 108static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
111 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 109 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
112#ifdef CONFIG_OCFS2_FS_POSIX_ACL
113 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] 110 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
114 = &ocfs2_xattr_acl_access_handler, 111 = &ocfs2_xattr_acl_access_handler,
115 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] 112 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
116 = &ocfs2_xattr_acl_default_handler, 113 = &ocfs2_xattr_acl_default_handler,
117#endif
118 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 114 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler,
119 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, 115 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler,
120}; 116};
@@ -205,8 +201,6 @@ static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
205 int offset, 201 int offset,
206 struct ocfs2_xattr_value_root **xv, 202 struct ocfs2_xattr_value_root **xv,
207 struct buffer_head **bh); 203 struct buffer_head **bh);
208static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
209 const void *value, size_t size, int flags);
210 204
211static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 205static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
212{ 206{
@@ -6066,7 +6060,7 @@ static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6066 * to the extent block, so just calculate a maximum record num. 6060 * to the extent block, so just calculate a maximum record num.
6067 */ 6061 */
6068 if (!xv->xr_list.l_tree_depth) 6062 if (!xv->xr_list.l_tree_depth)
6069 *num_recs += xv->xr_list.l_next_free_rec; 6063 *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6070 else 6064 else
6071 *num_recs += ocfs2_clusters_for_bytes(sb, 6065 *num_recs += ocfs2_clusters_for_bytes(sb,
6072 XATTR_SIZE_MAX); 6066 XATTR_SIZE_MAX);
@@ -6978,9 +6972,9 @@ int ocfs2_init_security_and_acl(struct inode *dir,
6978 6972
6979 ret = ocfs2_init_security_get(inode, dir, &si); 6973 ret = ocfs2_init_security_get(inode, dir, &si);
6980 if (!ret) { 6974 if (!ret) {
6981 ret = ocfs2_xattr_security_set(inode, si.name, 6975 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
6982 si.value, si.value_len, 6976 si.name, si.value, si.value_len,
6983 XATTR_CREATE); 6977 XATTR_CREATE);
6984 if (ret) { 6978 if (ret) {
6985 mlog_errno(ret); 6979 mlog_errno(ret);
6986 goto leave; 6980 goto leave;
@@ -7008,9 +7002,9 @@ leave:
7008/* 7002/*
7009 * 'security' attributes support 7003 * 'security' attributes support
7010 */ 7004 */
7011static size_t ocfs2_xattr_security_list(struct inode *inode, char *list, 7005static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
7012 size_t list_size, const char *name, 7006 size_t list_size, const char *name,
7013 size_t name_len) 7007 size_t name_len, int type)
7014{ 7008{
7015 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; 7009 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
7016 const size_t total_len = prefix_len + name_len + 1; 7010 const size_t total_len = prefix_len + name_len + 1;
@@ -7023,23 +7017,23 @@ static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
7023 return total_len; 7017 return total_len;
7024} 7018}
7025 7019
7026static int ocfs2_xattr_security_get(struct inode *inode, const char *name, 7020static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
7027 void *buffer, size_t size) 7021 void *buffer, size_t size, int type)
7028{ 7022{
7029 if (strcmp(name, "") == 0) 7023 if (strcmp(name, "") == 0)
7030 return -EINVAL; 7024 return -EINVAL;
7031 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name, 7025 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7032 buffer, size); 7026 name, buffer, size);
7033} 7027}
7034 7028
7035static int ocfs2_xattr_security_set(struct inode *inode, const char *name, 7029static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7036 const void *value, size_t size, int flags) 7030 const void *value, size_t size, int flags, int type)
7037{ 7031{
7038 if (strcmp(name, "") == 0) 7032 if (strcmp(name, "") == 0)
7039 return -EINVAL; 7033 return -EINVAL;
7040 7034
7041 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value, 7035 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7042 size, flags); 7036 name, value, size, flags);
7043} 7037}
7044 7038
7045int ocfs2_init_security_get(struct inode *inode, 7039int ocfs2_init_security_get(struct inode *inode,
@@ -7076,9 +7070,9 @@ struct xattr_handler ocfs2_xattr_security_handler = {
7076/* 7070/*
7077 * 'trusted' attributes support 7071 * 'trusted' attributes support
7078 */ 7072 */
7079static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list, 7073static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
7080 size_t list_size, const char *name, 7074 size_t list_size, const char *name,
7081 size_t name_len) 7075 size_t name_len, int type)
7082{ 7076{
7083 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; 7077 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
7084 const size_t total_len = prefix_len + name_len + 1; 7078 const size_t total_len = prefix_len + name_len + 1;
@@ -7091,23 +7085,23 @@ static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
7091 return total_len; 7085 return total_len;
7092} 7086}
7093 7087
7094static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name, 7088static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
7095 void *buffer, size_t size) 7089 void *buffer, size_t size, int type)
7096{ 7090{
7097 if (strcmp(name, "") == 0) 7091 if (strcmp(name, "") == 0)
7098 return -EINVAL; 7092 return -EINVAL;
7099 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name, 7093 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7100 buffer, size); 7094 name, buffer, size);
7101} 7095}
7102 7096
7103static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name, 7097static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
7104 const void *value, size_t size, int flags) 7098 const void *value, size_t size, int flags, int type)
7105{ 7099{
7106 if (strcmp(name, "") == 0) 7100 if (strcmp(name, "") == 0)
7107 return -EINVAL; 7101 return -EINVAL;
7108 7102
7109 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value, 7103 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7110 size, flags); 7104 name, value, size, flags);
7111} 7105}
7112 7106
7113struct xattr_handler ocfs2_xattr_trusted_handler = { 7107struct xattr_handler ocfs2_xattr_trusted_handler = {
@@ -7120,13 +7114,13 @@ struct xattr_handler ocfs2_xattr_trusted_handler = {
7120/* 7114/*
7121 * 'user' attributes support 7115 * 'user' attributes support
7122 */ 7116 */
7123static size_t ocfs2_xattr_user_list(struct inode *inode, char *list, 7117static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
7124 size_t list_size, const char *name, 7118 size_t list_size, const char *name,
7125 size_t name_len) 7119 size_t name_len, int type)
7126{ 7120{
7127 const size_t prefix_len = XATTR_USER_PREFIX_LEN; 7121 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
7128 const size_t total_len = prefix_len + name_len + 1; 7122 const size_t total_len = prefix_len + name_len + 1;
7129 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7123 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7130 7124
7131 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7125 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7132 return 0; 7126 return 0;
@@ -7139,31 +7133,31 @@ static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
7139 return total_len; 7133 return total_len;
7140} 7134}
7141 7135
7142static int ocfs2_xattr_user_get(struct inode *inode, const char *name, 7136static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
7143 void *buffer, size_t size) 7137 void *buffer, size_t size, int type)
7144{ 7138{
7145 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7139 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7146 7140
7147 if (strcmp(name, "") == 0) 7141 if (strcmp(name, "") == 0)
7148 return -EINVAL; 7142 return -EINVAL;
7149 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7143 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7150 return -EOPNOTSUPP; 7144 return -EOPNOTSUPP;
7151 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, 7145 return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
7152 buffer, size); 7146 buffer, size);
7153} 7147}
7154 7148
7155static int ocfs2_xattr_user_set(struct inode *inode, const char *name, 7149static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
7156 const void *value, size_t size, int flags) 7150 const void *value, size_t size, int flags, int type)
7157{ 7151{
7158 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7152 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7159 7153
7160 if (strcmp(name, "") == 0) 7154 if (strcmp(name, "") == 0)
7161 return -EINVAL; 7155 return -EINVAL;
7162 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) 7156 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7163 return -EOPNOTSUPP; 7157 return -EOPNOTSUPP;
7164 7158
7165 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value, 7159 return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
7166 size, flags); 7160 name, value, size, flags);
7167} 7161}
7168 7162
7169struct xattr_handler ocfs2_xattr_user_handler = { 7163struct xattr_handler ocfs2_xattr_user_handler = {
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 08e36389f56d..abd72a47f520 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -40,10 +40,8 @@ struct ocfs2_security_xattr_info {
40extern struct xattr_handler ocfs2_xattr_user_handler; 40extern struct xattr_handler ocfs2_xattr_user_handler;
41extern struct xattr_handler ocfs2_xattr_trusted_handler; 41extern struct xattr_handler ocfs2_xattr_trusted_handler;
42extern struct xattr_handler ocfs2_xattr_security_handler; 42extern struct xattr_handler ocfs2_xattr_security_handler;
43#ifdef CONFIG_OCFS2_FS_POSIX_ACL
44extern struct xattr_handler ocfs2_xattr_acl_access_handler; 43extern struct xattr_handler ocfs2_xattr_acl_access_handler;
45extern struct xattr_handler ocfs2_xattr_acl_default_handler; 44extern struct xattr_handler ocfs2_xattr_acl_default_handler;
46#endif
47extern struct xattr_handler *ocfs2_xattr_handlers[]; 45extern struct xattr_handler *ocfs2_xattr_handlers[];
48 46
49ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); 47ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
diff --git a/fs/open.c b/fs/open.c
index b4b31d277f3a..040cef72bc00 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -30,6 +30,9 @@
30#include <linux/audit.h> 30#include <linux/audit.h>
31#include <linux/falloc.h> 31#include <linux/falloc.h>
32#include <linux/fs_struct.h> 32#include <linux/fs_struct.h>
33#include <linux/ima.h>
34
35#include "internal.h"
33 36
34int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) 37int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
35{ 38{
@@ -818,15 +821,14 @@ static inline int __get_file_write_access(struct inode *inode,
818} 821}
819 822
820static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, 823static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
821 int flags, struct file *f, 824 struct file *f,
822 int (*open)(struct inode *, struct file *), 825 int (*open)(struct inode *, struct file *),
823 const struct cred *cred) 826 const struct cred *cred)
824{ 827{
825 struct inode *inode; 828 struct inode *inode;
826 int error; 829 int error;
827 830
828 f->f_flags = flags; 831 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
829 f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK |
830 FMODE_PREAD | FMODE_PWRITE; 832 FMODE_PREAD | FMODE_PWRITE;
831 inode = dentry->d_inode; 833 inode = dentry->d_inode;
832 if (f->f_mode & FMODE_WRITE) { 834 if (f->f_mode & FMODE_WRITE) {
@@ -855,6 +857,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
855 if (error) 857 if (error)
856 goto cleanup_all; 858 goto cleanup_all;
857 } 859 }
860 ima_counts_get(f);
858 861
859 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 862 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
860 863
@@ -926,7 +929,6 @@ struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry
926 if (IS_ERR(dentry)) 929 if (IS_ERR(dentry))
927 goto out_err; 930 goto out_err;
928 nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt), 931 nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
929 nd->intent.open.flags - 1,
930 nd->intent.open.file, 932 nd->intent.open.file,
931 open, cred); 933 open, cred);
932out: 934out:
@@ -945,7 +947,7 @@ EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
945 * 947 *
946 * Note that this function destroys the original nameidata 948 * Note that this function destroys the original nameidata
947 */ 949 */
948struct file *nameidata_to_filp(struct nameidata *nd, int flags) 950struct file *nameidata_to_filp(struct nameidata *nd)
949{ 951{
950 const struct cred *cred = current_cred(); 952 const struct cred *cred = current_cred();
951 struct file *filp; 953 struct file *filp;
@@ -954,7 +956,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
954 filp = nd->intent.open.file; 956 filp = nd->intent.open.file;
955 /* Has the filesystem initialised the file for us? */ 957 /* Has the filesystem initialised the file for us? */
956 if (filp->f_path.dentry == NULL) 958 if (filp->f_path.dentry == NULL)
957 filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp, 959 filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
958 NULL, cred); 960 NULL, cred);
959 else 961 else
960 path_put(&nd->path); 962 path_put(&nd->path);
@@ -993,7 +995,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
993 return ERR_PTR(error); 995 return ERR_PTR(error);
994 } 996 }
995 997
996 return __dentry_open(dentry, mnt, flags, f, NULL, cred); 998 f->f_flags = flags;
999 return __dentry_open(dentry, mnt, f, NULL, cred);
997} 1000}
998EXPORT_SYMBOL(dentry_open); 1001EXPORT_SYMBOL(dentry_open);
999 1002
diff --git a/fs/pipe.c b/fs/pipe.c
index ae17d026aaa3..37ba29ff3158 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -906,17 +906,6 @@ void free_pipe_info(struct inode *inode)
906} 906}
907 907
908static struct vfsmount *pipe_mnt __read_mostly; 908static struct vfsmount *pipe_mnt __read_mostly;
909static int pipefs_delete_dentry(struct dentry *dentry)
910{
911 /*
912 * At creation time, we pretended this dentry was hashed
913 * (by clearing DCACHE_UNHASHED bit in d_flags)
914 * At delete time, we restore the truth : not hashed.
915 * (so that dput() can proceed correctly)
916 */
917 dentry->d_flags |= DCACHE_UNHASHED;
918 return 0;
919}
920 909
921/* 910/*
922 * pipefs_dname() is called from d_path(). 911 * pipefs_dname() is called from d_path().
@@ -928,7 +917,6 @@ static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
928} 917}
929 918
930static const struct dentry_operations pipefs_dentry_operations = { 919static const struct dentry_operations pipefs_dentry_operations = {
931 .d_delete = pipefs_delete_dentry,
932 .d_dname = pipefs_dname, 920 .d_dname = pipefs_dname,
933}; 921};
934 922
@@ -974,7 +962,7 @@ struct file *create_write_pipe(int flags)
974 int err; 962 int err;
975 struct inode *inode; 963 struct inode *inode;
976 struct file *f; 964 struct file *f;
977 struct dentry *dentry; 965 struct path path;
978 struct qstr name = { .name = "" }; 966 struct qstr name = { .name = "" };
979 967
980 err = -ENFILE; 968 err = -ENFILE;
@@ -983,21 +971,16 @@ struct file *create_write_pipe(int flags)
983 goto err; 971 goto err;
984 972
985 err = -ENOMEM; 973 err = -ENOMEM;
986 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); 974 path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
987 if (!dentry) 975 if (!path.dentry)
988 goto err_inode; 976 goto err_inode;
977 path.mnt = mntget(pipe_mnt);
989 978
990 dentry->d_op = &pipefs_dentry_operations; 979 path.dentry->d_op = &pipefs_dentry_operations;
991 /* 980 d_instantiate(path.dentry, inode);
992 * We dont want to publish this dentry into global dentry hash table.
993 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
994 * This permits a working /proc/$pid/fd/XXX on pipes
995 */
996 dentry->d_flags &= ~DCACHE_UNHASHED;
997 d_instantiate(dentry, inode);
998 981
999 err = -ENFILE; 982 err = -ENFILE;
1000 f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops); 983 f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
1001 if (!f) 984 if (!f)
1002 goto err_dentry; 985 goto err_dentry;
1003 f->f_mapping = inode->i_mapping; 986 f->f_mapping = inode->i_mapping;
@@ -1009,7 +992,7 @@ struct file *create_write_pipe(int flags)
1009 992
1010 err_dentry: 993 err_dentry:
1011 free_pipe_info(inode); 994 free_pipe_info(inode);
1012 dput(dentry); 995 path_put(&path);
1013 return ERR_PTR(err); 996 return ERR_PTR(err);
1014 997
1015 err_inode: 998 err_inode:
@@ -1028,20 +1011,14 @@ void free_write_pipe(struct file *f)
1028 1011
1029struct file *create_read_pipe(struct file *wrf, int flags) 1012struct file *create_read_pipe(struct file *wrf, int flags)
1030{ 1013{
1031 struct file *f = get_empty_filp(); 1014 /* Grab pipe from the writer */
1015 struct file *f = alloc_file(&wrf->f_path, FMODE_READ,
1016 &read_pipefifo_fops);
1032 if (!f) 1017 if (!f)
1033 return ERR_PTR(-ENFILE); 1018 return ERR_PTR(-ENFILE);
1034 1019
1035 /* Grab pipe from the writer */
1036 f->f_path = wrf->f_path;
1037 path_get(&wrf->f_path); 1020 path_get(&wrf->f_path);
1038 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
1039
1040 f->f_pos = 0;
1041 f->f_flags = O_RDONLY | (flags & O_NONBLOCK); 1021 f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
1042 f->f_op = &read_pipefifo_fops;
1043 f->f_mode = FMODE_READ;
1044 f->f_version = 0;
1045 1022
1046 return f; 1023 return f;
1047} 1024}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 4badde179b18..13b5d0708175 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -134,13 +134,16 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
134 * simple bit tests. 134 * simple bit tests.
135 */ 135 */
136static const char *task_state_array[] = { 136static const char *task_state_array[] = {
137 "R (running)", /* 0 */ 137 "R (running)", /* 0 */
138 "S (sleeping)", /* 1 */ 138 "S (sleeping)", /* 1 */
139 "D (disk sleep)", /* 2 */ 139 "D (disk sleep)", /* 2 */
140 "T (stopped)", /* 4 */ 140 "T (stopped)", /* 4 */
141 "T (tracing stop)", /* 8 */ 141 "t (tracing stop)", /* 8 */
142 "Z (zombie)", /* 16 */ 142 "Z (zombie)", /* 16 */
143 "X (dead)" /* 32 */ 143 "X (dead)", /* 32 */
144 "x (dead)", /* 64 */
145 "K (wakekill)", /* 128 */
146 "W (waking)", /* 256 */
144}; 147};
145 148
146static inline const char *get_task_state(struct task_struct *tsk) 149static inline const char *get_task_state(struct task_struct *tsk)
@@ -148,6 +151,8 @@ static inline const char *get_task_state(struct task_struct *tsk)
148 unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state; 151 unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
149 const char **p = &task_state_array[0]; 152 const char **p = &task_state_array[0];
150 153
154 BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
155
151 while (state) { 156 while (state) {
152 p++; 157 p++;
153 state >>= 1; 158 state >>= 1;
@@ -322,94 +327,6 @@ static inline void task_context_switch_counts(struct seq_file *m,
322 p->nivcsw); 327 p->nivcsw);
323} 328}
324 329
325#ifdef CONFIG_MMU
326
327struct stack_stats {
328 struct vm_area_struct *vma;
329 unsigned long startpage;
330 unsigned long usage;
331};
332
333static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr,
334 unsigned long end, struct mm_walk *walk)
335{
336 struct stack_stats *ss = walk->private;
337 struct vm_area_struct *vma = ss->vma;
338 pte_t *pte, ptent;
339 spinlock_t *ptl;
340 int ret = 0;
341
342 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
343 for (; addr != end; pte++, addr += PAGE_SIZE) {
344 ptent = *pte;
345
346#ifdef CONFIG_STACK_GROWSUP
347 if (pte_present(ptent) || is_swap_pte(ptent))
348 ss->usage = addr - ss->startpage + PAGE_SIZE;
349#else
350 if (pte_present(ptent) || is_swap_pte(ptent)) {
351 ss->usage = ss->startpage - addr + PAGE_SIZE;
352 pte++;
353 ret = 1;
354 break;
355 }
356#endif
357 }
358 pte_unmap_unlock(pte - 1, ptl);
359 cond_resched();
360 return ret;
361}
362
363static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma,
364 struct task_struct *task)
365{
366 struct stack_stats ss;
367 struct mm_walk stack_walk = {
368 .pmd_entry = stack_usage_pte_range,
369 .mm = vma->vm_mm,
370 .private = &ss,
371 };
372
373 if (!vma->vm_mm || is_vm_hugetlb_page(vma))
374 return 0;
375
376 ss.vma = vma;
377 ss.startpage = task->stack_start & PAGE_MASK;
378 ss.usage = 0;
379
380#ifdef CONFIG_STACK_GROWSUP
381 walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end,
382 &stack_walk);
383#else
384 walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE,
385 &stack_walk);
386#endif
387 return ss.usage;
388}
389
390static inline void task_show_stack_usage(struct seq_file *m,
391 struct task_struct *task)
392{
393 struct vm_area_struct *vma;
394 struct mm_struct *mm = get_task_mm(task);
395
396 if (mm) {
397 down_read(&mm->mmap_sem);
398 vma = find_vma(mm, task->stack_start);
399 if (vma)
400 seq_printf(m, "Stack usage:\t%lu kB\n",
401 get_stack_usage_in_bytes(vma, task) >> 10);
402
403 up_read(&mm->mmap_sem);
404 mmput(mm);
405 }
406}
407#else
408static void task_show_stack_usage(struct seq_file *m, struct task_struct *task)
409{
410}
411#endif /* CONFIG_MMU */
412
413static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) 330static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
414{ 331{
415 seq_printf(m, "Cpus_allowed:\t"); 332 seq_printf(m, "Cpus_allowed:\t");
@@ -440,7 +357,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
440 task_show_regs(m, task); 357 task_show_regs(m, task);
441#endif 358#endif
442 task_context_switch_counts(m, task); 359 task_context_switch_counts(m, task);
443 task_show_stack_usage(m, task);
444 return 0; 360 return 0;
445} 361}
446 362
diff --git a/fs/proc/base.c b/fs/proc/base.c
index af643b5aefe8..e42bbd843ed1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1265,6 +1265,72 @@ static const struct file_operations proc_pid_sched_operations = {
1265 1265
1266#endif 1266#endif
1267 1267
1268static ssize_t comm_write(struct file *file, const char __user *buf,
1269 size_t count, loff_t *offset)
1270{
1271 struct inode *inode = file->f_path.dentry->d_inode;
1272 struct task_struct *p;
1273 char buffer[TASK_COMM_LEN];
1274
1275 memset(buffer, 0, sizeof(buffer));
1276 if (count > sizeof(buffer) - 1)
1277 count = sizeof(buffer) - 1;
1278 if (copy_from_user(buffer, buf, count))
1279 return -EFAULT;
1280
1281 p = get_proc_task(inode);
1282 if (!p)
1283 return -ESRCH;
1284
1285 if (same_thread_group(current, p))
1286 set_task_comm(p, buffer);
1287 else
1288 count = -EINVAL;
1289
1290 put_task_struct(p);
1291
1292 return count;
1293}
1294
1295static int comm_show(struct seq_file *m, void *v)
1296{
1297 struct inode *inode = m->private;
1298 struct task_struct *p;
1299
1300 p = get_proc_task(inode);
1301 if (!p)
1302 return -ESRCH;
1303
1304 task_lock(p);
1305 seq_printf(m, "%s\n", p->comm);
1306 task_unlock(p);
1307
1308 put_task_struct(p);
1309
1310 return 0;
1311}
1312
1313static int comm_open(struct inode *inode, struct file *filp)
1314{
1315 int ret;
1316
1317 ret = single_open(filp, comm_show, NULL);
1318 if (!ret) {
1319 struct seq_file *m = filp->private_data;
1320
1321 m->private = inode;
1322 }
1323 return ret;
1324}
1325
1326static const struct file_operations proc_pid_set_comm_operations = {
1327 .open = comm_open,
1328 .read = seq_read,
1329 .write = comm_write,
1330 .llseek = seq_lseek,
1331 .release = single_release,
1332};
1333
1268/* 1334/*
1269 * We added or removed a vma mapping the executable. The vmas are only mapped 1335 * We added or removed a vma mapping the executable. The vmas are only mapped
1270 * during exec and are not mapped with the mmap system call. 1336 * during exec and are not mapped with the mmap system call.
@@ -1353,7 +1419,6 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1353 goto out; 1419 goto out;
1354 1420
1355 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); 1421 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
1356 nd->last_type = LAST_BIND;
1357out: 1422out:
1358 return ERR_PTR(error); 1423 return ERR_PTR(error);
1359} 1424}
@@ -2200,7 +2265,7 @@ static const struct inode_operations proc_attr_dir_inode_operations = {
2200 2265
2201#endif 2266#endif
2202 2267
2203#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 2268#ifdef CONFIG_ELF_CORE
2204static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, 2269static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
2205 size_t count, loff_t *ppos) 2270 size_t count, loff_t *ppos)
2206{ 2271{
@@ -2504,6 +2569,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2504#ifdef CONFIG_SCHED_DEBUG 2569#ifdef CONFIG_SCHED_DEBUG
2505 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2570 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2506#endif 2571#endif
2572 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2507#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2573#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2508 INF("syscall", S_IRUSR, proc_pid_syscall), 2574 INF("syscall", S_IRUSR, proc_pid_syscall),
2509#endif 2575#endif
@@ -2556,7 +2622,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2556#ifdef CONFIG_FAULT_INJECTION 2622#ifdef CONFIG_FAULT_INJECTION
2557 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 2623 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
2558#endif 2624#endif
2559#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) 2625#ifdef CONFIG_ELF_CORE
2560 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), 2626 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
2561#endif 2627#endif
2562#ifdef CONFIG_TASK_IO_ACCOUNTING 2628#ifdef CONFIG_TASK_IO_ACCOUNTING
@@ -2838,6 +2904,7 @@ static const struct pid_entry tid_base_stuff[] = {
2838#ifdef CONFIG_SCHED_DEBUG 2904#ifdef CONFIG_SCHED_DEBUG
2839 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2905 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2840#endif 2906#endif
2907 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2841#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2908#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2842 INF("syscall", S_IRUSR, proc_pid_syscall), 2909 INF("syscall", S_IRUSR, proc_pid_syscall),
2843#endif 2910#endif
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index fa678abc9db1..480cb1065eec 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -429,7 +429,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
429 unsigned int ino; 429 unsigned int ino;
430 430
431 ino = de->low_ino; 431 ino = de->low_ino;
432 de_get(de); 432 pde_get(de);
433 spin_unlock(&proc_subdir_lock); 433 spin_unlock(&proc_subdir_lock);
434 error = -EINVAL; 434 error = -EINVAL;
435 inode = proc_get_inode(dir->i_sb, ino, de); 435 inode = proc_get_inode(dir->i_sb, ino, de);
@@ -445,7 +445,7 @@ out_unlock:
445 return NULL; 445 return NULL;
446 } 446 }
447 if (de) 447 if (de)
448 de_put(de); 448 pde_put(de);
449 return ERR_PTR(error); 449 return ERR_PTR(error);
450} 450}
451 451
@@ -509,17 +509,17 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
509 struct proc_dir_entry *next; 509 struct proc_dir_entry *next;
510 510
511 /* filldir passes info to user space */ 511 /* filldir passes info to user space */
512 de_get(de); 512 pde_get(de);
513 spin_unlock(&proc_subdir_lock); 513 spin_unlock(&proc_subdir_lock);
514 if (filldir(dirent, de->name, de->namelen, filp->f_pos, 514 if (filldir(dirent, de->name, de->namelen, filp->f_pos,
515 de->low_ino, de->mode >> 12) < 0) { 515 de->low_ino, de->mode >> 12) < 0) {
516 de_put(de); 516 pde_put(de);
517 goto out; 517 goto out;
518 } 518 }
519 spin_lock(&proc_subdir_lock); 519 spin_lock(&proc_subdir_lock);
520 filp->f_pos++; 520 filp->f_pos++;
521 next = de->next; 521 next = de->next;
522 de_put(de); 522 pde_put(de);
523 de = next; 523 de = next;
524 } while (de); 524 } while (de);
525 spin_unlock(&proc_subdir_lock); 525 spin_unlock(&proc_subdir_lock);
@@ -763,7 +763,7 @@ out:
763 return NULL; 763 return NULL;
764} 764}
765 765
766void free_proc_entry(struct proc_dir_entry *de) 766static void free_proc_entry(struct proc_dir_entry *de)
767{ 767{
768 unsigned int ino = de->low_ino; 768 unsigned int ino = de->low_ino;
769 769
@@ -777,6 +777,12 @@ void free_proc_entry(struct proc_dir_entry *de)
777 kfree(de); 777 kfree(de);
778} 778}
779 779
780void pde_put(struct proc_dir_entry *pde)
781{
782 if (atomic_dec_and_test(&pde->count))
783 free_proc_entry(pde);
784}
785
780/* 786/*
781 * Remove a /proc entry and free it if it's not currently in use. 787 * Remove a /proc entry and free it if it's not currently in use.
782 */ 788 */
@@ -845,6 +851,5 @@ continue_removing:
845 WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory " 851 WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory "
846 "'%s/%s', leaking at least '%s'\n", __func__, 852 "'%s/%s', leaking at least '%s'\n", __func__,
847 de->parent->name, de->name, de->subdir->name); 853 de->parent->name, de->name, de->subdir->name);
848 if (atomic_dec_and_test(&de->count)) 854 pde_put(de);
849 free_proc_entry(de);
850} 855}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d78ade305541..445a02bcaab3 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -24,29 +24,6 @@
24 24
25#include "internal.h" 25#include "internal.h"
26 26
27struct proc_dir_entry *de_get(struct proc_dir_entry *de)
28{
29 atomic_inc(&de->count);
30 return de;
31}
32
33/*
34 * Decrements the use count and checks for deferred deletion.
35 */
36void de_put(struct proc_dir_entry *de)
37{
38 if (!atomic_read(&de->count)) {
39 printk("de_put: entry %s already free!\n", de->name);
40 return;
41 }
42
43 if (atomic_dec_and_test(&de->count))
44 free_proc_entry(de);
45}
46
47/*
48 * Decrement the use count of the proc_dir_entry.
49 */
50static void proc_delete_inode(struct inode *inode) 27static void proc_delete_inode(struct inode *inode)
51{ 28{
52 struct proc_dir_entry *de; 29 struct proc_dir_entry *de;
@@ -59,7 +36,7 @@ static void proc_delete_inode(struct inode *inode)
59 /* Let go of any associated proc directory entry */ 36 /* Let go of any associated proc directory entry */
60 de = PROC_I(inode)->pde; 37 de = PROC_I(inode)->pde;
61 if (de) 38 if (de)
62 de_put(de); 39 pde_put(de);
63 if (PROC_I(inode)->sysctl) 40 if (PROC_I(inode)->sysctl)
64 sysctl_head_put(PROC_I(inode)->sysctl); 41 sysctl_head_put(PROC_I(inode)->sysctl);
65 clear_inode(inode); 42 clear_inode(inode);
@@ -480,7 +457,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
480 } 457 }
481 unlock_new_inode(inode); 458 unlock_new_inode(inode);
482 } else 459 } else
483 de_put(de); 460 pde_put(de);
484 return inode; 461 return inode;
485} 462}
486 463
@@ -495,7 +472,7 @@ int proc_fill_super(struct super_block *s)
495 s->s_op = &proc_sops; 472 s->s_op = &proc_sops;
496 s->s_time_gran = 1; 473 s->s_time_gran = 1;
497 474
498 de_get(&proc_root); 475 pde_get(&proc_root);
499 root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root); 476 root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root);
500 if (!root_inode) 477 if (!root_inode)
501 goto out_no_root; 478 goto out_no_root;
@@ -509,6 +486,6 @@ int proc_fill_super(struct super_block *s)
509out_no_root: 486out_no_root:
510 printk("proc_read_super: get root inode failed\n"); 487 printk("proc_read_super: get root inode failed\n");
511 iput(root_inode); 488 iput(root_inode);
512 de_put(&proc_root); 489 pde_put(&proc_root);
513 return -ENOMEM; 490 return -ENOMEM;
514} 491}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 753ca37002c8..1f24a3eddd12 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -61,8 +61,6 @@ extern const struct file_operations proc_pagemap_operations;
61extern const struct file_operations proc_net_operations; 61extern const struct file_operations proc_net_operations;
62extern const struct inode_operations proc_net_inode_operations; 62extern const struct inode_operations proc_net_inode_operations;
63 63
64void free_proc_entry(struct proc_dir_entry *de);
65
66void proc_init_inodecache(void); 64void proc_init_inodecache(void);
67 65
68static inline struct pid *proc_pid(struct inode *inode) 66static inline struct pid *proc_pid(struct inode *inode)
@@ -101,8 +99,12 @@ unsigned long task_vsize(struct mm_struct *);
101int task_statm(struct mm_struct *, int *, int *, int *, int *); 99int task_statm(struct mm_struct *, int *, int *, int *, int *);
102void task_mem(struct seq_file *, struct mm_struct *); 100void task_mem(struct seq_file *, struct mm_struct *);
103 101
104struct proc_dir_entry *de_get(struct proc_dir_entry *de); 102static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
105void de_put(struct proc_dir_entry *de); 103{
104 atomic_inc(&pde->count);
105 return pde;
106}
107void pde_put(struct proc_dir_entry *pde);
106 108
107extern struct vfsmount *proc_mnt; 109extern struct vfsmount *proc_mnt;
108int proc_fill_super(struct super_block *); 110int proc_fill_super(struct super_block *);
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 5033ce0d254b..180cf5a0bd67 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -8,6 +8,7 @@
8#include <linux/proc_fs.h> 8#include <linux/proc_fs.h>
9#include <linux/seq_file.h> 9#include <linux/seq_file.h>
10#include <linux/hugetlb.h> 10#include <linux/hugetlb.h>
11#include <linux/kernel-page-flags.h>
11#include <asm/uaccess.h> 12#include <asm/uaccess.h>
12#include "internal.h" 13#include "internal.h"
13 14
@@ -71,52 +72,12 @@ static const struct file_operations proc_kpagecount_operations = {
71 * physical page flags. 72 * physical page flags.
72 */ 73 */
73 74
74/* These macros are used to decouple internal flags from exported ones */
75
76#define KPF_LOCKED 0
77#define KPF_ERROR 1
78#define KPF_REFERENCED 2
79#define KPF_UPTODATE 3
80#define KPF_DIRTY 4
81#define KPF_LRU 5
82#define KPF_ACTIVE 6
83#define KPF_SLAB 7
84#define KPF_WRITEBACK 8
85#define KPF_RECLAIM 9
86#define KPF_BUDDY 10
87
88/* 11-20: new additions in 2.6.31 */
89#define KPF_MMAP 11
90#define KPF_ANON 12
91#define KPF_SWAPCACHE 13
92#define KPF_SWAPBACKED 14
93#define KPF_COMPOUND_HEAD 15
94#define KPF_COMPOUND_TAIL 16
95#define KPF_HUGE 17
96#define KPF_UNEVICTABLE 18
97#define KPF_HWPOISON 19
98#define KPF_NOPAGE 20
99
100#define KPF_KSM 21
101
102/* kernel hacking assistances
103 * WARNING: subject to change, never rely on them!
104 */
105#define KPF_RESERVED 32
106#define KPF_MLOCKED 33
107#define KPF_MAPPEDTODISK 34
108#define KPF_PRIVATE 35
109#define KPF_PRIVATE_2 36
110#define KPF_OWNER_PRIVATE 37
111#define KPF_ARCH 38
112#define KPF_UNCACHED 39
113
114static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit) 75static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
115{ 76{
116 return ((kflags >> kbit) & 1) << ubit; 77 return ((kflags >> kbit) & 1) << ubit;
117} 78}
118 79
119static u64 get_uflags(struct page *page) 80u64 stable_page_flags(struct page *page)
120{ 81{
121 u64 k; 82 u64 k;
122 u64 u; 83 u64 u;
@@ -219,7 +180,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
219 else 180 else
220 ppage = NULL; 181 ppage = NULL;
221 182
222 if (put_user(get_uflags(ppage), out)) { 183 if (put_user(stable_page_flags(ppage), out)) {
223 ret = -EFAULT; 184 ret = -EFAULT;
224 break; 185 break;
225 } 186 }
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2a1bef9203c6..f277c4a111cb 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -361,12 +361,11 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
361 if (!pte_present(ptent)) 361 if (!pte_present(ptent))
362 continue; 362 continue;
363 363
364 mss->resident += PAGE_SIZE;
365
366 page = vm_normal_page(vma, addr, ptent); 364 page = vm_normal_page(vma, addr, ptent);
367 if (!page) 365 if (!page)
368 continue; 366 continue;
369 367
368 mss->resident += PAGE_SIZE;
370 /* Accumulate the size in pages that have been accessed. */ 369 /* Accumulate the size in pages that have been accessed. */
371 if (pte_young(ptent) || PageReferenced(page)) 370 if (pte_young(ptent) || PageReferenced(page))
372 mss->referenced += PAGE_SIZE; 371 mss->referenced += PAGE_SIZE;
@@ -650,6 +649,50 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
650 return err; 649 return err;
651} 650}
652 651
652static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset)
653{
654 u64 pme = 0;
655 if (pte_present(pte))
656 pme = PM_PFRAME(pte_pfn(pte) + offset)
657 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
658 return pme;
659}
660
661static int pagemap_hugetlb_range(pte_t *pte, unsigned long addr,
662 unsigned long end, struct mm_walk *walk)
663{
664 struct vm_area_struct *vma;
665 struct pagemapread *pm = walk->private;
666 struct hstate *hs = NULL;
667 int err = 0;
668
669 vma = find_vma(walk->mm, addr);
670 if (vma)
671 hs = hstate_vma(vma);
672 for (; addr != end; addr += PAGE_SIZE) {
673 u64 pfn = PM_NOT_PRESENT;
674
675 if (vma && (addr >= vma->vm_end)) {
676 vma = find_vma(walk->mm, addr);
677 if (vma)
678 hs = hstate_vma(vma);
679 }
680
681 if (vma && (vma->vm_start <= addr) && is_vm_hugetlb_page(vma)) {
682 /* calculate pfn of the "raw" page in the hugepage. */
683 int offset = (addr & ~huge_page_mask(hs)) >> PAGE_SHIFT;
684 pfn = huge_pte_to_pagemap_entry(*pte, offset);
685 }
686 err = add_to_pagemap(addr, pfn, pm);
687 if (err)
688 return err;
689 }
690
691 cond_resched();
692
693 return err;
694}
695
653/* 696/*
654 * /proc/pid/pagemap - an array mapping virtual pages to pfns 697 * /proc/pid/pagemap - an array mapping virtual pages to pfns
655 * 698 *
@@ -742,6 +785,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
742 785
743 pagemap_walk.pmd_entry = pagemap_pte_range; 786 pagemap_walk.pmd_entry = pagemap_pte_range;
744 pagemap_walk.pte_hole = pagemap_pte_hole; 787 pagemap_walk.pte_hole = pagemap_pte_hole;
788 pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
745 pagemap_walk.mm = mm; 789 pagemap_walk.mm = mm;
746 pagemap_walk.private = &pm; 790 pagemap_walk.private = &pm;
747 791
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 8f5c05d3dbd3..5d9fd64ef81a 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -110,9 +110,13 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
110 } 110 }
111 } 111 }
112 112
113 size += (*text = mm->end_code - mm->start_code); 113 *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
114 size += (*data = mm->start_stack - mm->start_data); 114 >> PAGE_SHIFT;
115 *data = (PAGE_ALIGN(mm->start_stack) - (mm->start_data & PAGE_MASK))
116 >> PAGE_SHIFT;
115 up_read(&mm->mmap_sem); 117 up_read(&mm->mmap_sem);
118 size >>= PAGE_SHIFT;
119 size += *text + *data;
116 *resident = size; 120 *resident = size;
117 return size; 121 return size;
118} 122}
diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c
index 32f5d131a644..22e0d60e53ef 100644
--- a/fs/qnx4/bitmap.c
+++ b/fs/qnx4/bitmap.c
@@ -17,13 +17,6 @@
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include "qnx4.h" 18#include "qnx4.h"
19 19
20#if 0
21int qnx4_new_block(struct super_block *sb)
22{
23 return 0;
24}
25#endif /* 0 */
26
27static void count_bits(register const char *bmPart, register int size, 20static void count_bits(register const char *bmPart, register int size,
28 int *const tf) 21 int *const tf)
29{ 22{
@@ -35,22 +28,7 @@ static void count_bits(register const char *bmPart, register int size,
35 } 28 }
36 do { 29 do {
37 b = *bmPart++; 30 b = *bmPart++;
38 if ((b & 1) == 0) 31 tot += 8 - hweight8(b);
39 tot++;
40 if ((b & 2) == 0)
41 tot++;
42 if ((b & 4) == 0)
43 tot++;
44 if ((b & 8) == 0)
45 tot++;
46 if ((b & 16) == 0)
47 tot++;
48 if ((b & 32) == 0)
49 tot++;
50 if ((b & 64) == 0)
51 tot++;
52 if ((b & 128) == 0)
53 tot++;
54 size--; 32 size--;
55 } while (size != 0); 33 } while (size != 0);
56 *tf = tot; 34 *tf = tot;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 449f5a66dd34..ebf3440d28ca 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -64,25 +64,7 @@ static struct buffer_head *qnx4_getblk(struct inode *inode, int nr,
64 result = sb_getblk(inode->i_sb, nr); 64 result = sb_getblk(inode->i_sb, nr);
65 return result; 65 return result;
66 } 66 }
67 if (!create) { 67 return NULL;
68 return NULL;
69 }
70#if 0
71 tmp = qnx4_new_block(inode->i_sb);
72 if (!tmp) {
73 return NULL;
74 }
75 result = sb_getblk(inode->i_sb, tmp);
76 if (tst) {
77 qnx4_free_block(inode->i_sb, tmp);
78 brelse(result);
79 goto repeat;
80 }
81 tst = tmp;
82#endif
83 inode->i_ctime = CURRENT_TIME_SEC;
84 mark_inode_dirty(inode);
85 return result;
86} 68}
87 69
88struct buffer_head *qnx4_bread(struct inode *inode, int block, int create) 70struct buffer_head *qnx4_bread(struct inode *inode, int block, int create)
@@ -113,8 +95,6 @@ static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_h
113 if ( phys ) { 95 if ( phys ) {
114 // logical block is before EOF 96 // logical block is before EOF
115 map_bh(bh, inode->i_sb, phys); 97 map_bh(bh, inode->i_sb, phys);
116 } else if ( create ) {
117 // to be done.
118 } 98 }
119 return 0; 99 return 0;
120} 100}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index cd6bb9a33c13..3fc62b097bed 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -323,6 +323,30 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
323} 323}
324EXPORT_SYMBOL(dquot_mark_dquot_dirty); 324EXPORT_SYMBOL(dquot_mark_dquot_dirty);
325 325
326/* Dirtify all the dquots - this can block when journalling */
327static inline int mark_all_dquot_dirty(struct dquot * const *dquot)
328{
329 int ret, err, cnt;
330
331 ret = err = 0;
332 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
333 if (dquot[cnt])
334 /* Even in case of error we have to continue */
335 ret = mark_dquot_dirty(dquot[cnt]);
336 if (!err)
337 err = ret;
338 }
339 return err;
340}
341
342static inline void dqput_all(struct dquot **dquot)
343{
344 unsigned int cnt;
345
346 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
347 dqput(dquot[cnt]);
348}
349
326/* This function needs dq_list_lock */ 350/* This function needs dq_list_lock */
327static inline int clear_dquot_dirty(struct dquot *dquot) 351static inline int clear_dquot_dirty(struct dquot *dquot)
328{ 352{
@@ -1268,8 +1292,7 @@ int dquot_initialize(struct inode *inode, int type)
1268out_err: 1292out_err:
1269 up_write(&sb_dqopt(sb)->dqptr_sem); 1293 up_write(&sb_dqopt(sb)->dqptr_sem);
1270 /* Drop unused references */ 1294 /* Drop unused references */
1271 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1295 dqput_all(got);
1272 dqput(got[cnt]);
1273 return ret; 1296 return ret;
1274} 1297}
1275EXPORT_SYMBOL(dquot_initialize); 1298EXPORT_SYMBOL(dquot_initialize);
@@ -1288,9 +1311,7 @@ int dquot_drop(struct inode *inode)
1288 inode->i_dquot[cnt] = NULL; 1311 inode->i_dquot[cnt] = NULL;
1289 } 1312 }
1290 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1313 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1291 1314 dqput_all(put);
1292 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1293 dqput(put[cnt]);
1294 return 0; 1315 return 0;
1295} 1316}
1296EXPORT_SYMBOL(dquot_drop); 1317EXPORT_SYMBOL(dquot_drop);
@@ -1319,6 +1340,70 @@ void vfs_dq_drop(struct inode *inode)
1319EXPORT_SYMBOL(vfs_dq_drop); 1340EXPORT_SYMBOL(vfs_dq_drop);
1320 1341
1321/* 1342/*
1343 * inode_reserved_space is managed internally by quota, and protected by
1344 * i_lock similar to i_blocks+i_bytes.
1345 */
1346static qsize_t *inode_reserved_space(struct inode * inode)
1347{
1348 /* Filesystem must explicitly define it's own method in order to use
1349 * quota reservation interface */
1350 BUG_ON(!inode->i_sb->dq_op->get_reserved_space);
1351 return inode->i_sb->dq_op->get_reserved_space(inode);
1352}
1353
1354static void inode_add_rsv_space(struct inode *inode, qsize_t number)
1355{
1356 spin_lock(&inode->i_lock);
1357 *inode_reserved_space(inode) += number;
1358 spin_unlock(&inode->i_lock);
1359}
1360
1361
1362static void inode_claim_rsv_space(struct inode *inode, qsize_t number)
1363{
1364 spin_lock(&inode->i_lock);
1365 *inode_reserved_space(inode) -= number;
1366 __inode_add_bytes(inode, number);
1367 spin_unlock(&inode->i_lock);
1368}
1369
1370static void inode_sub_rsv_space(struct inode *inode, qsize_t number)
1371{
1372 spin_lock(&inode->i_lock);
1373 *inode_reserved_space(inode) -= number;
1374 spin_unlock(&inode->i_lock);
1375}
1376
1377static qsize_t inode_get_rsv_space(struct inode *inode)
1378{
1379 qsize_t ret;
1380
1381 if (!inode->i_sb->dq_op->get_reserved_space)
1382 return 0;
1383 spin_lock(&inode->i_lock);
1384 ret = *inode_reserved_space(inode);
1385 spin_unlock(&inode->i_lock);
1386 return ret;
1387}
1388
1389static void inode_incr_space(struct inode *inode, qsize_t number,
1390 int reserve)
1391{
1392 if (reserve)
1393 inode_add_rsv_space(inode, number);
1394 else
1395 inode_add_bytes(inode, number);
1396}
1397
1398static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
1399{
1400 if (reserve)
1401 inode_sub_rsv_space(inode, number);
1402 else
1403 inode_sub_bytes(inode, number);
1404}
1405
1406/*
1322 * Following four functions update i_blocks+i_bytes fields and 1407 * Following four functions update i_blocks+i_bytes fields and
1323 * quota information (together with appropriate checks) 1408 * quota information (together with appropriate checks)
1324 * NOTE: We absolutely rely on the fact that caller dirties 1409 * NOTE: We absolutely rely on the fact that caller dirties
@@ -1336,6 +1421,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
1336 int cnt, ret = QUOTA_OK; 1421 int cnt, ret = QUOTA_OK;
1337 char warntype[MAXQUOTAS]; 1422 char warntype[MAXQUOTAS];
1338 1423
1424 /*
1425 * First test before acquiring mutex - solves deadlocks when we
1426 * re-enter the quota code and are already holding the mutex
1427 */
1428 if (IS_NOQUOTA(inode)) {
1429 inode_incr_space(inode, number, reserve);
1430 goto out;
1431 }
1432
1433 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1434 if (IS_NOQUOTA(inode)) {
1435 inode_incr_space(inode, number, reserve);
1436 goto out_unlock;
1437 }
1438
1339 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1439 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1340 warntype[cnt] = QUOTA_NL_NOWARN; 1440 warntype[cnt] = QUOTA_NL_NOWARN;
1341 1441
@@ -1346,7 +1446,8 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
1346 if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt) 1446 if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
1347 == NO_QUOTA) { 1447 == NO_QUOTA) {
1348 ret = NO_QUOTA; 1448 ret = NO_QUOTA;
1349 goto out_unlock; 1449 spin_unlock(&dq_data_lock);
1450 goto out_flush_warn;
1350 } 1451 }
1351 } 1452 }
1352 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1453 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1357,64 +1458,29 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
1357 else 1458 else
1358 dquot_incr_space(inode->i_dquot[cnt], number); 1459 dquot_incr_space(inode->i_dquot[cnt], number);
1359 } 1460 }
1360 if (!reserve) 1461 inode_incr_space(inode, number, reserve);
1361 inode_add_bytes(inode, number);
1362out_unlock:
1363 spin_unlock(&dq_data_lock); 1462 spin_unlock(&dq_data_lock);
1463
1464 if (reserve)
1465 goto out_flush_warn;
1466 mark_all_dquot_dirty(inode->i_dquot);
1467out_flush_warn:
1364 flush_warnings(inode->i_dquot, warntype); 1468 flush_warnings(inode->i_dquot, warntype);
1469out_unlock:
1470 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1471out:
1365 return ret; 1472 return ret;
1366} 1473}
1367 1474
1368int dquot_alloc_space(struct inode *inode, qsize_t number, int warn) 1475int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
1369{ 1476{
1370 int cnt, ret = QUOTA_OK; 1477 return __dquot_alloc_space(inode, number, warn, 0);
1371
1372 /*
1373 * First test before acquiring mutex - solves deadlocks when we
1374 * re-enter the quota code and are already holding the mutex
1375 */
1376 if (IS_NOQUOTA(inode)) {
1377 inode_add_bytes(inode, number);
1378 goto out;
1379 }
1380
1381 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1382 if (IS_NOQUOTA(inode)) {
1383 inode_add_bytes(inode, number);
1384 goto out_unlock;
1385 }
1386
1387 ret = __dquot_alloc_space(inode, number, warn, 0);
1388 if (ret == NO_QUOTA)
1389 goto out_unlock;
1390
1391 /* Dirtify all the dquots - this can block when journalling */
1392 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1393 if (inode->i_dquot[cnt])
1394 mark_dquot_dirty(inode->i_dquot[cnt]);
1395out_unlock:
1396 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1397out:
1398 return ret;
1399} 1478}
1400EXPORT_SYMBOL(dquot_alloc_space); 1479EXPORT_SYMBOL(dquot_alloc_space);
1401 1480
1402int dquot_reserve_space(struct inode *inode, qsize_t number, int warn) 1481int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
1403{ 1482{
1404 int ret = QUOTA_OK; 1483 return __dquot_alloc_space(inode, number, warn, 1);
1405
1406 if (IS_NOQUOTA(inode))
1407 goto out;
1408
1409 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1410 if (IS_NOQUOTA(inode))
1411 goto out_unlock;
1412
1413 ret = __dquot_alloc_space(inode, number, warn, 1);
1414out_unlock:
1415 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1416out:
1417 return ret;
1418} 1484}
1419EXPORT_SYMBOL(dquot_reserve_space); 1485EXPORT_SYMBOL(dquot_reserve_space);
1420 1486
@@ -1455,10 +1521,7 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
1455warn_put_all: 1521warn_put_all:
1456 spin_unlock(&dq_data_lock); 1522 spin_unlock(&dq_data_lock);
1457 if (ret == QUOTA_OK) 1523 if (ret == QUOTA_OK)
1458 /* Dirtify all the dquots - this can block when journalling */ 1524 mark_all_dquot_dirty(inode->i_dquot);
1459 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1460 if (inode->i_dquot[cnt])
1461 mark_dquot_dirty(inode->i_dquot[cnt]);
1462 flush_warnings(inode->i_dquot, warntype); 1525 flush_warnings(inode->i_dquot, warntype);
1463 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1526 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1464 return ret; 1527 return ret;
@@ -1471,14 +1534,14 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
1471 int ret = QUOTA_OK; 1534 int ret = QUOTA_OK;
1472 1535
1473 if (IS_NOQUOTA(inode)) { 1536 if (IS_NOQUOTA(inode)) {
1474 inode_add_bytes(inode, number); 1537 inode_claim_rsv_space(inode, number);
1475 goto out; 1538 goto out;
1476 } 1539 }
1477 1540
1478 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1541 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1479 if (IS_NOQUOTA(inode)) { 1542 if (IS_NOQUOTA(inode)) {
1480 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1543 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1481 inode_add_bytes(inode, number); 1544 inode_claim_rsv_space(inode, number);
1482 goto out; 1545 goto out;
1483 } 1546 }
1484 1547
@@ -1490,12 +1553,9 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
1490 number); 1553 number);
1491 } 1554 }
1492 /* Update inode bytes */ 1555 /* Update inode bytes */
1493 inode_add_bytes(inode, number); 1556 inode_claim_rsv_space(inode, number);
1494 spin_unlock(&dq_data_lock); 1557 spin_unlock(&dq_data_lock);
1495 /* Dirtify all the dquots - this can block when journalling */ 1558 mark_all_dquot_dirty(inode->i_dquot);
1496 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1497 if (inode->i_dquot[cnt])
1498 mark_dquot_dirty(inode->i_dquot[cnt]);
1499 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1559 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1500out: 1560out:
1501 return ret; 1561 return ret;
@@ -1503,38 +1563,9 @@ out:
1503EXPORT_SYMBOL(dquot_claim_space); 1563EXPORT_SYMBOL(dquot_claim_space);
1504 1564
1505/* 1565/*
1506 * Release reserved quota space
1507 */
1508void dquot_release_reserved_space(struct inode *inode, qsize_t number)
1509{
1510 int cnt;
1511
1512 if (IS_NOQUOTA(inode))
1513 goto out;
1514
1515 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1516 if (IS_NOQUOTA(inode))
1517 goto out_unlock;
1518
1519 spin_lock(&dq_data_lock);
1520 /* Release reserved dquots */
1521 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1522 if (inode->i_dquot[cnt])
1523 dquot_free_reserved_space(inode->i_dquot[cnt], number);
1524 }
1525 spin_unlock(&dq_data_lock);
1526
1527out_unlock:
1528 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1529out:
1530 return;
1531}
1532EXPORT_SYMBOL(dquot_release_reserved_space);
1533
1534/*
1535 * This operation can block, but only after everything is updated 1566 * This operation can block, but only after everything is updated
1536 */ 1567 */
1537int dquot_free_space(struct inode *inode, qsize_t number) 1568int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
1538{ 1569{
1539 unsigned int cnt; 1570 unsigned int cnt;
1540 char warntype[MAXQUOTAS]; 1571 char warntype[MAXQUOTAS];
@@ -1543,7 +1574,7 @@ int dquot_free_space(struct inode *inode, qsize_t number)
1543 * re-enter the quota code and are already holding the mutex */ 1574 * re-enter the quota code and are already holding the mutex */
1544 if (IS_NOQUOTA(inode)) { 1575 if (IS_NOQUOTA(inode)) {
1545out_sub: 1576out_sub:
1546 inode_sub_bytes(inode, number); 1577 inode_decr_space(inode, number, reserve);
1547 return QUOTA_OK; 1578 return QUOTA_OK;
1548 } 1579 }
1549 1580
@@ -1558,21 +1589,40 @@ out_sub:
1558 if (!inode->i_dquot[cnt]) 1589 if (!inode->i_dquot[cnt])
1559 continue; 1590 continue;
1560 warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number); 1591 warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
1561 dquot_decr_space(inode->i_dquot[cnt], number); 1592 if (reserve)
1593 dquot_free_reserved_space(inode->i_dquot[cnt], number);
1594 else
1595 dquot_decr_space(inode->i_dquot[cnt], number);
1562 } 1596 }
1563 inode_sub_bytes(inode, number); 1597 inode_decr_space(inode, number, reserve);
1564 spin_unlock(&dq_data_lock); 1598 spin_unlock(&dq_data_lock);
1565 /* Dirtify all the dquots - this can block when journalling */ 1599
1566 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1600 if (reserve)
1567 if (inode->i_dquot[cnt]) 1601 goto out_unlock;
1568 mark_dquot_dirty(inode->i_dquot[cnt]); 1602 mark_all_dquot_dirty(inode->i_dquot);
1603out_unlock:
1569 flush_warnings(inode->i_dquot, warntype); 1604 flush_warnings(inode->i_dquot, warntype);
1570 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1605 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1571 return QUOTA_OK; 1606 return QUOTA_OK;
1572} 1607}
1608
1609int dquot_free_space(struct inode *inode, qsize_t number)
1610{
1611 return __dquot_free_space(inode, number, 0);
1612}
1573EXPORT_SYMBOL(dquot_free_space); 1613EXPORT_SYMBOL(dquot_free_space);
1574 1614
1575/* 1615/*
1616 * Release reserved quota space
1617 */
1618void dquot_release_reserved_space(struct inode *inode, qsize_t number)
1619{
1620 __dquot_free_space(inode, number, 1);
1621
1622}
1623EXPORT_SYMBOL(dquot_release_reserved_space);
1624
1625/*
1576 * This operation can block, but only after everything is updated 1626 * This operation can block, but only after everything is updated
1577 */ 1627 */
1578int dquot_free_inode(const struct inode *inode, qsize_t number) 1628int dquot_free_inode(const struct inode *inode, qsize_t number)
@@ -1599,10 +1649,7 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
1599 dquot_decr_inodes(inode->i_dquot[cnt], number); 1649 dquot_decr_inodes(inode->i_dquot[cnt], number);
1600 } 1650 }
1601 spin_unlock(&dq_data_lock); 1651 spin_unlock(&dq_data_lock);
1602 /* Dirtify all the dquots - this can block when journalling */ 1652 mark_all_dquot_dirty(inode->i_dquot);
1603 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1604 if (inode->i_dquot[cnt])
1605 mark_dquot_dirty(inode->i_dquot[cnt]);
1606 flush_warnings(inode->i_dquot, warntype); 1653 flush_warnings(inode->i_dquot, warntype);
1607 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1654 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1608 return QUOTA_OK; 1655 return QUOTA_OK;
@@ -1610,19 +1657,6 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
1610EXPORT_SYMBOL(dquot_free_inode); 1657EXPORT_SYMBOL(dquot_free_inode);
1611 1658
1612/* 1659/*
1613 * call back function, get reserved quota space from underlying fs
1614 */
1615qsize_t dquot_get_reserved_space(struct inode *inode)
1616{
1617 qsize_t reserved_space = 0;
1618
1619 if (sb_any_quota_active(inode->i_sb) &&
1620 inode->i_sb->dq_op->get_reserved_space)
1621 reserved_space = inode->i_sb->dq_op->get_reserved_space(inode);
1622 return reserved_space;
1623}
1624
1625/*
1626 * Transfer the number of inode and blocks from one diskquota to an other. 1660 * Transfer the number of inode and blocks from one diskquota to an other.
1627 * 1661 *
1628 * This operation can block, but only after everything is updated 1662 * This operation can block, but only after everything is updated
@@ -1665,7 +1699,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1665 } 1699 }
1666 spin_lock(&dq_data_lock); 1700 spin_lock(&dq_data_lock);
1667 cur_space = inode_get_bytes(inode); 1701 cur_space = inode_get_bytes(inode);
1668 rsv_space = dquot_get_reserved_space(inode); 1702 rsv_space = inode_get_rsv_space(inode);
1669 space = cur_space + rsv_space; 1703 space = cur_space + rsv_space;
1670 /* Build the transfer_from list and check the limits */ 1704 /* Build the transfer_from list and check the limits */
1671 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1705 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1709,25 +1743,18 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1709 spin_unlock(&dq_data_lock); 1743 spin_unlock(&dq_data_lock);
1710 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1744 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1711 1745
1712 /* Dirtify all the dquots - this can block when journalling */ 1746 mark_all_dquot_dirty(transfer_from);
1713 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1747 mark_all_dquot_dirty(transfer_to);
1714 if (transfer_from[cnt]) 1748 /* The reference we got is transferred to the inode */
1715 mark_dquot_dirty(transfer_from[cnt]); 1749 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1716 if (transfer_to[cnt]) { 1750 transfer_to[cnt] = NULL;
1717 mark_dquot_dirty(transfer_to[cnt]);
1718 /* The reference we got is transferred to the inode */
1719 transfer_to[cnt] = NULL;
1720 }
1721 }
1722warn_put_all: 1751warn_put_all:
1723 flush_warnings(transfer_to, warntype_to); 1752 flush_warnings(transfer_to, warntype_to);
1724 flush_warnings(transfer_from, warntype_from_inodes); 1753 flush_warnings(transfer_from, warntype_from_inodes);
1725 flush_warnings(transfer_from, warntype_from_space); 1754 flush_warnings(transfer_from, warntype_from_space);
1726put_all: 1755put_all:
1727 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1756 dqput_all(transfer_from);
1728 dqput(transfer_from[cnt]); 1757 dqput_all(transfer_to);
1729 dqput(transfer_to[cnt]);
1730 }
1731 return ret; 1758 return ret;
1732over_quota: 1759over_quota:
1733 spin_unlock(&dq_data_lock); 1760 spin_unlock(&dq_data_lock);
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 3dfc23e02135..e3da02f4986f 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -97,8 +97,11 @@ static int v2_read_file_info(struct super_block *sb, int type)
97 unsigned int version; 97 unsigned int version;
98 98
99 if (!v2_read_header(sb, type, &dqhead)) 99 if (!v2_read_header(sb, type, &dqhead))
100 return 0; 100 return -1;
101 version = le32_to_cpu(dqhead.dqh_version); 101 version = le32_to_cpu(dqhead.dqh_version);
102 if ((info->dqi_fmt_id == QFMT_VFS_V0 && version != 0) ||
103 (info->dqi_fmt_id == QFMT_VFS_V1 && version != 1))
104 return -1;
102 105
103 size = sb->s_op->quota_read(sb, type, (char *)&dinfo, 106 size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
104 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); 107 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
@@ -120,8 +123,8 @@ static int v2_read_file_info(struct super_block *sb, int type)
120 info->dqi_maxilimit = 0xffffffff; 123 info->dqi_maxilimit = 0xffffffff;
121 } else { 124 } else {
122 /* used space is stored as unsigned 64-bit value */ 125 /* used space is stored as unsigned 64-bit value */
123 info->dqi_maxblimit = 0xffffffffffffffff; /* 2^64-1 */ 126 info->dqi_maxblimit = 0xffffffffffffffffULL; /* 2^64-1 */
124 info->dqi_maxilimit = 0xffffffffffffffff; 127 info->dqi_maxilimit = 0xffffffffffffffffULL;
125 } 128 }
126 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); 129 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
127 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); 130 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 32fae4040ebf..1739a4aba25f 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -60,7 +60,7 @@ const struct inode_operations ramfs_file_inode_operations = {
60 */ 60 */
61int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) 61int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
62{ 62{
63 unsigned long npages, xpages, loop, limit; 63 unsigned long npages, xpages, loop;
64 struct page *pages; 64 struct page *pages;
65 unsigned order; 65 unsigned order;
66 void *data; 66 void *data;
@@ -123,30 +123,6 @@ add_error:
123 123
124/*****************************************************************************/ 124/*****************************************************************************/
125/* 125/*
126 * check that file shrinkage doesn't leave any VMAs dangling in midair
127 */
128static int ramfs_nommu_check_mappings(struct inode *inode,
129 size_t newsize, size_t size)
130{
131 struct vm_area_struct *vma;
132 struct prio_tree_iter iter;
133
134 /* search for VMAs that fall within the dead zone */
135 vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
136 newsize >> PAGE_SHIFT,
137 (size + PAGE_SIZE - 1) >> PAGE_SHIFT
138 ) {
139 /* found one - only interested if it's shared out of the page
140 * cache */
141 if (vma->vm_flags & VM_SHARED)
142 return -ETXTBSY; /* not quite true, but near enough */
143 }
144
145 return 0;
146}
147
148/*****************************************************************************/
149/*
150 * 126 *
151 */ 127 */
152static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) 128static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
@@ -164,7 +140,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
164 140
165 /* check that a decrease in size doesn't cut off any shared mappings */ 141 /* check that a decrease in size doesn't cut off any shared mappings */
166 if (newsize < size) { 142 if (newsize < size) {
167 ret = ramfs_nommu_check_mappings(inode, newsize, size); 143 ret = nommu_shrink_inode_mappings(inode, size, newsize);
168 if (ret < 0) 144 if (ret < 0)
169 return ret; 145 return ret;
170 } 146 }
diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile
index 6a9e30c041dd..792b3cb2cd18 100644
--- a/fs/reiserfs/Makefile
+++ b/fs/reiserfs/Makefile
@@ -7,7 +7,11 @@ obj-$(CONFIG_REISERFS_FS) += reiserfs.o
7reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \ 7reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \
8 super.o prints.o objectid.o lbalance.o ibalance.o stree.o \ 8 super.o prints.o objectid.o lbalance.o ibalance.o stree.o \
9 hashes.o tail_conversion.o journal.o resize.o \ 9 hashes.o tail_conversion.o journal.o resize.o \
10 item_ops.o ioctl.o procfs.o xattr.o lock.o 10 item_ops.o ioctl.o xattr.o lock.o
11
12ifeq ($(CONFIG_REISERFS_PROC_INFO),y)
13reiserfs-objs += procfs.o
14endif
11 15
12ifeq ($(CONFIG_REISERFS_FS_XATTR),y) 16ifeq ($(CONFIG_REISERFS_FS_XATTR),y)
13reiserfs-objs += xattr_user.o xattr_trusted.o 17reiserfs-objs += xattr_user.o xattr_trusted.o
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 685495707181..65c872761177 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1277,7 +1277,10 @@ int reiserfs_init_bitmap_cache(struct super_block *sb)
1277 struct reiserfs_bitmap_info *bitmap; 1277 struct reiserfs_bitmap_info *bitmap;
1278 unsigned int bmap_nr = reiserfs_bmap_count(sb); 1278 unsigned int bmap_nr = reiserfs_bmap_count(sb);
1279 1279
1280 /* Avoid lock recursion in fault case */
1281 reiserfs_write_unlock(sb);
1280 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr); 1282 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
1283 reiserfs_write_lock(sb);
1281 if (bitmap == NULL) 1284 if (bitmap == NULL)
1282 return -ENOMEM; 1285 return -ENOMEM;
1283 1286
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 3a28e7751b3c..9087b10209e6 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -31,11 +31,12 @@ void reiserfs_delete_inode(struct inode *inode)
31 JOURNAL_PER_BALANCE_CNT * 2 + 31 JOURNAL_PER_BALANCE_CNT * 2 +
32 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); 32 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
33 struct reiserfs_transaction_handle th; 33 struct reiserfs_transaction_handle th;
34 int depth;
34 int err; 35 int err;
35 36
36 truncate_inode_pages(&inode->i_data, 0); 37 truncate_inode_pages(&inode->i_data, 0);
37 38
38 reiserfs_write_lock(inode->i_sb); 39 depth = reiserfs_write_lock_once(inode->i_sb);
39 40
40 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ 41 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
41 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ 42 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
@@ -74,7 +75,7 @@ void reiserfs_delete_inode(struct inode *inode)
74 out: 75 out:
75 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ 76 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
76 inode->i_blocks = 0; 77 inode->i_blocks = 0;
77 reiserfs_write_unlock(inode->i_sb); 78 reiserfs_write_unlock_once(inode->i_sb, depth);
78} 79}
79 80
80static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, 81static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
@@ -2538,6 +2539,12 @@ static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
2538 return reiserfs_write_full_page(page, wbc); 2539 return reiserfs_write_full_page(page, wbc);
2539} 2540}
2540 2541
2542static void reiserfs_truncate_failed_write(struct inode *inode)
2543{
2544 truncate_inode_pages(inode->i_mapping, inode->i_size);
2545 reiserfs_truncate_file(inode, 0);
2546}
2547
2541static int reiserfs_write_begin(struct file *file, 2548static int reiserfs_write_begin(struct file *file,
2542 struct address_space *mapping, 2549 struct address_space *mapping,
2543 loff_t pos, unsigned len, unsigned flags, 2550 loff_t pos, unsigned len, unsigned flags,
@@ -2604,6 +2611,8 @@ static int reiserfs_write_begin(struct file *file,
2604 if (ret) { 2611 if (ret) {
2605 unlock_page(page); 2612 unlock_page(page);
2606 page_cache_release(page); 2613 page_cache_release(page);
2614 /* Truncate allocated blocks */
2615 reiserfs_truncate_failed_write(inode);
2607 } 2616 }
2608 return ret; 2617 return ret;
2609} 2618}
@@ -2701,9 +2710,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2701 ** transaction tracking stuff when the size changes. So, we have 2710 ** transaction tracking stuff when the size changes. So, we have
2702 ** to do the i_size updates here. 2711 ** to do the i_size updates here.
2703 */ 2712 */
2704 pos += copied; 2713 if (pos + copied > inode->i_size) {
2705
2706 if (pos > inode->i_size) {
2707 struct reiserfs_transaction_handle myth; 2714 struct reiserfs_transaction_handle myth;
2708 lock_depth = reiserfs_write_lock_once(inode->i_sb); 2715 lock_depth = reiserfs_write_lock_once(inode->i_sb);
2709 locked = true; 2716 locked = true;
@@ -2721,7 +2728,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2721 goto journal_error; 2728 goto journal_error;
2722 2729
2723 reiserfs_update_inode_transaction(inode); 2730 reiserfs_update_inode_transaction(inode);
2724 inode->i_size = pos; 2731 inode->i_size = pos + copied;
2725 /* 2732 /*
2726 * this will just nest into our transaction. It's important 2733 * this will just nest into our transaction. It's important
2727 * to use mark_inode_dirty so the inode gets pushed around on the 2734 * to use mark_inode_dirty so the inode gets pushed around on the
@@ -2751,6 +2758,10 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2751 reiserfs_write_unlock_once(inode->i_sb, lock_depth); 2758 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2752 unlock_page(page); 2759 unlock_page(page);
2753 page_cache_release(page); 2760 page_cache_release(page);
2761
2762 if (pos + len > inode->i_size)
2763 reiserfs_truncate_failed_write(inode);
2764
2754 return ret == 0 ? copied : ret; 2765 return ret == 0 ? copied : ret;
2755 2766
2756 journal_error: 2767 journal_error:
@@ -3051,13 +3062,14 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3051int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) 3062int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3052{ 3063{
3053 struct inode *inode = dentry->d_inode; 3064 struct inode *inode = dentry->d_inode;
3054 int error;
3055 unsigned int ia_valid; 3065 unsigned int ia_valid;
3066 int depth;
3067 int error;
3056 3068
3057 /* must be turned off for recursive notify_change calls */ 3069 /* must be turned off for recursive notify_change calls */
3058 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); 3070 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3059 3071
3060 reiserfs_write_lock(inode->i_sb); 3072 depth = reiserfs_write_lock_once(inode->i_sb);
3061 if (attr->ia_valid & ATTR_SIZE) { 3073 if (attr->ia_valid & ATTR_SIZE) {
3062 /* version 2 items will be caught by the s_maxbytes check 3074 /* version 2 items will be caught by the s_maxbytes check
3063 ** done for us in vmtruncate 3075 ** done for us in vmtruncate
@@ -3138,8 +3150,17 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3138 journal_end(&th, inode->i_sb, jbegin_count); 3150 journal_end(&th, inode->i_sb, jbegin_count);
3139 } 3151 }
3140 } 3152 }
3141 if (!error) 3153 if (!error) {
3154 /*
3155 * Relax the lock here, as it might truncate the
3156 * inode pages and wait for inode pages locks.
3157 * To release such page lock, the owner needs the
3158 * reiserfs lock
3159 */
3160 reiserfs_write_unlock_once(inode->i_sb, depth);
3142 error = inode_setattr(inode, attr); 3161 error = inode_setattr(inode, attr);
3162 depth = reiserfs_write_lock_once(inode->i_sb);
3163 }
3143 } 3164 }
3144 3165
3145 if (!error && reiserfs_posixacl(inode->i_sb)) { 3166 if (!error && reiserfs_posixacl(inode->i_sb)) {
@@ -3148,7 +3169,8 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3148 } 3169 }
3149 3170
3150 out: 3171 out:
3151 reiserfs_write_unlock(inode->i_sb); 3172 reiserfs_write_unlock_once(inode->i_sb, depth);
3173
3152 return error; 3174 return error;
3153} 3175}
3154 3176
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index ace77451ceb1..f53505de0712 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -104,9 +104,10 @@ setflags_out:
104 err = put_user(inode->i_generation, (int __user *)arg); 104 err = put_user(inode->i_generation, (int __user *)arg);
105 break; 105 break;
106 case REISERFS_IOC_SETVERSION: 106 case REISERFS_IOC_SETVERSION:
107 if (!is_owner_or_cap(inode)) 107 if (!is_owner_or_cap(inode)) {
108 err = -EPERM; 108 err = -EPERM;
109 break; 109 break;
110 }
110 err = mnt_want_write(filp->f_path.mnt); 111 err = mnt_want_write(filp->f_path.mnt);
111 if (err) 112 if (err)
112 break; 113 break;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 2f8a7e7b8dab..83ac4d3b3cb0 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2009,10 +2009,11 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
2009 destroy_workqueue(commit_wq); 2009 destroy_workqueue(commit_wq);
2010 commit_wq = NULL; 2010 commit_wq = NULL;
2011 } 2011 }
2012 reiserfs_write_lock(sb);
2013 2012
2014 free_journal_ram(sb); 2013 free_journal_ram(sb);
2015 2014
2015 reiserfs_write_lock(sb);
2016
2016 return 0; 2017 return 0;
2017} 2018}
2018 2019
@@ -2758,11 +2759,18 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2758 struct reiserfs_journal *journal; 2759 struct reiserfs_journal *journal;
2759 struct reiserfs_journal_list *jl; 2760 struct reiserfs_journal_list *jl;
2760 char b[BDEVNAME_SIZE]; 2761 char b[BDEVNAME_SIZE];
2762 int ret;
2761 2763
2764 /*
2765 * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS
2766 * dependency inversion warnings.
2767 */
2768 reiserfs_write_unlock(sb);
2762 journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal)); 2769 journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal));
2763 if (!journal) { 2770 if (!journal) {
2764 reiserfs_warning(sb, "journal-1256", 2771 reiserfs_warning(sb, "journal-1256",
2765 "unable to get memory for journal structure"); 2772 "unable to get memory for journal structure");
2773 reiserfs_write_lock(sb);
2766 return 1; 2774 return 1;
2767 } 2775 }
2768 memset(journal, 0, sizeof(struct reiserfs_journal)); 2776 memset(journal, 0, sizeof(struct reiserfs_journal));
@@ -2771,10 +2779,12 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2771 INIT_LIST_HEAD(&journal->j_working_list); 2779 INIT_LIST_HEAD(&journal->j_working_list);
2772 INIT_LIST_HEAD(&journal->j_journal_list); 2780 INIT_LIST_HEAD(&journal->j_journal_list);
2773 journal->j_persistent_trans = 0; 2781 journal->j_persistent_trans = 0;
2774 if (reiserfs_allocate_list_bitmaps(sb, 2782 ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
2775 journal->j_list_bitmap, 2783 reiserfs_bmap_count(sb));
2776 reiserfs_bmap_count(sb))) 2784 reiserfs_write_lock(sb);
2785 if (ret)
2777 goto free_and_return; 2786 goto free_and_return;
2787
2778 allocate_bitmap_nodes(sb); 2788 allocate_bitmap_nodes(sb);
2779 2789
2780 /* reserved for journal area support */ 2790 /* reserved for journal area support */
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index ee2cfc0fd8a7..b87aa2c1afc1 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -86,3 +86,12 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
86 reiserfs_panic(sb, "%s called without kernel lock held %d", 86 reiserfs_panic(sb, "%s called without kernel lock held %d",
87 caller); 87 caller);
88} 88}
89
90#ifdef CONFIG_REISERFS_CHECK
91void reiserfs_lock_check_recursive(struct super_block *sb)
92{
93 struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
94
95 WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n");
96}
97#endif
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index e296ff72a6cc..9d4dcf0b07cb 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -921,6 +921,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
921 struct reiserfs_transaction_handle th; 921 struct reiserfs_transaction_handle th;
922 int jbegin_count; 922 int jbegin_count;
923 unsigned long savelink; 923 unsigned long savelink;
924 int depth;
924 925
925 inode = dentry->d_inode; 926 inode = dentry->d_inode;
926 927
@@ -932,7 +933,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
932 JOURNAL_PER_BALANCE_CNT * 2 + 2 + 933 JOURNAL_PER_BALANCE_CNT * 2 + 2 +
933 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); 934 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
934 935
935 reiserfs_write_lock(dir->i_sb); 936 depth = reiserfs_write_lock_once(dir->i_sb);
936 retval = journal_begin(&th, dir->i_sb, jbegin_count); 937 retval = journal_begin(&th, dir->i_sb, jbegin_count);
937 if (retval) 938 if (retval)
938 goto out_unlink; 939 goto out_unlink;
@@ -993,7 +994,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
993 994
994 retval = journal_end(&th, dir->i_sb, jbegin_count); 995 retval = journal_end(&th, dir->i_sb, jbegin_count);
995 reiserfs_check_path(&path); 996 reiserfs_check_path(&path);
996 reiserfs_write_unlock(dir->i_sb); 997 reiserfs_write_unlock_once(dir->i_sb, depth);
997 return retval; 998 return retval;
998 999
999 end_unlink: 1000 end_unlink:
@@ -1003,7 +1004,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
1003 if (err) 1004 if (err)
1004 retval = err; 1005 retval = err;
1005 out_unlink: 1006 out_unlink:
1006 reiserfs_write_unlock(dir->i_sb); 1007 reiserfs_write_unlock_once(dir->i_sb, depth);
1007 return retval; 1008 return retval;
1008} 1009}
1009 1010
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 9229e5514a4e..7a9981196c1c 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -17,8 +17,6 @@
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/proc_fs.h> 18#include <linux/proc_fs.h>
19 19
20#ifdef CONFIG_REISERFS_PROC_INFO
21
22/* 20/*
23 * LOCKING: 21 * LOCKING:
24 * 22 *
@@ -48,14 +46,6 @@ static int show_version(struct seq_file *m, struct super_block *sb)
48 return 0; 46 return 0;
49} 47}
50 48
51int reiserfs_global_version_in_proc(char *buffer, char **start, off_t offset,
52 int count, int *eof, void *data)
53{
54 *start = buffer;
55 *eof = 1;
56 return 0;
57}
58
59#define SF( x ) ( r -> x ) 49#define SF( x ) ( r -> x )
60#define SFP( x ) SF( s_proc_info_data.x ) 50#define SFP( x ) SF( s_proc_info_data.x )
61#define SFPL( x ) SFP( x[ level ] ) 51#define SFPL( x ) SFP( x[ level ] )
@@ -538,19 +528,6 @@ int reiserfs_proc_info_done(struct super_block *sb)
538 return 0; 528 return 0;
539} 529}
540 530
541struct proc_dir_entry *reiserfs_proc_register_global(char *name,
542 read_proc_t * func)
543{
544 return (proc_info_root) ? create_proc_read_entry(name, 0,
545 proc_info_root,
546 func, NULL) : NULL;
547}
548
549void reiserfs_proc_unregister_global(const char *name)
550{
551 remove_proc_entry(name, proc_info_root);
552}
553
554int reiserfs_proc_info_global_init(void) 531int reiserfs_proc_info_global_init(void)
555{ 532{
556 if (proc_info_root == NULL) { 533 if (proc_info_root == NULL) {
@@ -572,48 +549,6 @@ int reiserfs_proc_info_global_done(void)
572 } 549 }
573 return 0; 550 return 0;
574} 551}
575
576/* REISERFS_PROC_INFO */
577#else
578
579int reiserfs_proc_info_init(struct super_block *sb)
580{
581 return 0;
582}
583int reiserfs_proc_info_done(struct super_block *sb)
584{
585 return 0;
586}
587
588struct proc_dir_entry *reiserfs_proc_register_global(char *name,
589 read_proc_t * func)
590{
591 return NULL;
592}
593
594void reiserfs_proc_unregister_global(const char *name)
595{;
596}
597
598int reiserfs_proc_info_global_init(void)
599{
600 return 0;
601}
602int reiserfs_proc_info_global_done(void)
603{
604 return 0;
605}
606
607int reiserfs_global_version_in_proc(char *buffer, char **start,
608 off_t offset,
609 int count, int *eof, void *data)
610{
611 return 0;
612}
613
614/* REISERFS_PROC_INFO */
615#endif
616
617/* 552/*
618 * Revision 1.1.8.2 2001/07/15 17:08:42 god 553 * Revision 1.1.8.2 2001/07/15 17:08:42 god
619 * . use get_super() in procfs.c 554 * . use get_super() in procfs.c
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 339b0baf2af6..b4a7dd03bdb9 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2222,8 +2222,6 @@ static int __init init_reiserfs_fs(void)
2222 } 2222 }
2223 2223
2224 reiserfs_proc_info_global_init(); 2224 reiserfs_proc_info_global_init();
2225 reiserfs_proc_register_global("version",
2226 reiserfs_global_version_in_proc);
2227 2225
2228 ret = register_filesystem(&reiserfs_fs_type); 2226 ret = register_filesystem(&reiserfs_fs_type);
2229 2227
@@ -2231,7 +2229,6 @@ static int __init init_reiserfs_fs(void)
2231 return 0; 2229 return 0;
2232 } 2230 }
2233 2231
2234 reiserfs_proc_unregister_global("version");
2235 reiserfs_proc_info_global_done(); 2232 reiserfs_proc_info_global_done();
2236 destroy_inodecache(); 2233 destroy_inodecache();
2237 2234
@@ -2240,7 +2237,6 @@ static int __init init_reiserfs_fs(void)
2240 2237
2241static void __exit exit_reiserfs_fs(void) 2238static void __exit exit_reiserfs_fs(void)
2242{ 2239{
2243 reiserfs_proc_unregister_global("version");
2244 reiserfs_proc_info_global_done(); 2240 reiserfs_proc_info_global_done();
2245 unregister_filesystem(&reiserfs_fs_type); 2241 unregister_filesystem(&reiserfs_fs_type);
2246 destroy_inodecache(); 2242 destroy_inodecache();
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 58aa8e75f7f5..81f09fab8ae4 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -48,6 +48,7 @@
48#include <net/checksum.h> 48#include <net/checksum.h>
49#include <linux/stat.h> 49#include <linux/stat.h>
50#include <linux/quotaops.h> 50#include <linux/quotaops.h>
51#include <linux/security.h>
51 52
52#define PRIVROOT_NAME ".reiserfs_priv" 53#define PRIVROOT_NAME ".reiserfs_priv"
53#define XAROOT_NAME "xattrs" 54#define XAROOT_NAME "xattrs"
@@ -82,7 +83,8 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
82 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 83 BUG_ON(!mutex_is_locked(&dir->i_mutex));
83 vfs_dq_init(dir); 84 vfs_dq_init(dir);
84 85
85 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 86 reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
87 I_MUTEX_CHILD, dir->i_sb);
86 error = dir->i_op->unlink(dir, dentry); 88 error = dir->i_op->unlink(dir, dentry);
87 mutex_unlock(&dentry->d_inode->i_mutex); 89 mutex_unlock(&dentry->d_inode->i_mutex);
88 90
@@ -97,7 +99,8 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
97 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 99 BUG_ON(!mutex_is_locked(&dir->i_mutex));
98 vfs_dq_init(dir); 100 vfs_dq_init(dir);
99 101
100 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 102 reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
103 I_MUTEX_CHILD, dir->i_sb);
101 dentry_unhash(dentry); 104 dentry_unhash(dentry);
102 error = dir->i_op->rmdir(dir, dentry); 105 error = dir->i_op->rmdir(dir, dentry);
103 if (!error) 106 if (!error)
@@ -234,16 +237,22 @@ static int reiserfs_for_each_xattr(struct inode *inode,
234 if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1) 237 if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1)
235 return 0; 238 return 0;
236 239
240 reiserfs_write_unlock(inode->i_sb);
237 dir = open_xa_dir(inode, XATTR_REPLACE); 241 dir = open_xa_dir(inode, XATTR_REPLACE);
238 if (IS_ERR(dir)) { 242 if (IS_ERR(dir)) {
239 err = PTR_ERR(dir); 243 err = PTR_ERR(dir);
244 reiserfs_write_lock(inode->i_sb);
240 goto out; 245 goto out;
241 } else if (!dir->d_inode) { 246 } else if (!dir->d_inode) {
242 err = 0; 247 err = 0;
248 reiserfs_write_lock(inode->i_sb);
243 goto out_dir; 249 goto out_dir;
244 } 250 }
245 251
246 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); 252 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
253
254 reiserfs_write_lock(inode->i_sb);
255
247 buf.xadir = dir; 256 buf.xadir = dir;
248 err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos); 257 err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos);
249 while ((err == 0 || err == -ENOSPC) && buf.count) { 258 while ((err == 0 || err == -ENOSPC) && buf.count) {
@@ -282,8 +291,9 @@ static int reiserfs_for_each_xattr(struct inode *inode,
282 err = journal_begin(&th, inode->i_sb, blocks); 291 err = journal_begin(&th, inode->i_sb, blocks);
283 if (!err) { 292 if (!err) {
284 int jerror; 293 int jerror;
285 mutex_lock_nested(&dir->d_parent->d_inode->i_mutex, 294 reiserfs_mutex_lock_nested_safe(
286 I_MUTEX_XATTR); 295 &dir->d_parent->d_inode->i_mutex,
296 I_MUTEX_XATTR, inode->i_sb);
287 err = action(dir, data); 297 err = action(dir, data);
288 jerror = journal_end(&th, inode->i_sb, blocks); 298 jerror = journal_end(&th, inode->i_sb, blocks);
289 mutex_unlock(&dir->d_parent->d_inode->i_mutex); 299 mutex_unlock(&dir->d_parent->d_inode->i_mutex);
@@ -442,7 +452,9 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
442 } 452 }
443 453
444 if (dentry->d_inode) { 454 if (dentry->d_inode) {
455 reiserfs_write_lock(inode->i_sb);
445 err = xattr_unlink(xadir->d_inode, dentry); 456 err = xattr_unlink(xadir->d_inode, dentry);
457 reiserfs_write_unlock(inode->i_sb);
446 update_ctime(inode); 458 update_ctime(inode);
447 } 459 }
448 460
@@ -476,15 +488,24 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
476 if (get_inode_sd_version(inode) == STAT_DATA_V1) 488 if (get_inode_sd_version(inode) == STAT_DATA_V1)
477 return -EOPNOTSUPP; 489 return -EOPNOTSUPP;
478 490
479 if (!buffer) 491 reiserfs_write_unlock(inode->i_sb);
480 return lookup_and_delete_xattr(inode, name); 492
493 if (!buffer) {
494 err = lookup_and_delete_xattr(inode, name);
495 reiserfs_write_lock(inode->i_sb);
496 return err;
497 }
481 498
482 dentry = xattr_lookup(inode, name, flags); 499 dentry = xattr_lookup(inode, name, flags);
483 if (IS_ERR(dentry)) 500 if (IS_ERR(dentry)) {
501 reiserfs_write_lock(inode->i_sb);
484 return PTR_ERR(dentry); 502 return PTR_ERR(dentry);
503 }
485 504
486 down_write(&REISERFS_I(inode)->i_xattr_sem); 505 down_write(&REISERFS_I(inode)->i_xattr_sem);
487 506
507 reiserfs_write_lock(inode->i_sb);
508
488 xahash = xattr_hash(buffer, buffer_size); 509 xahash = xattr_hash(buffer, buffer_size);
489 while (buffer_pos < buffer_size || buffer_pos == 0) { 510 while (buffer_pos < buffer_size || buffer_pos == 0) {
490 size_t chunk; 511 size_t chunk;
@@ -539,8 +560,12 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
539 .ia_size = buffer_size, 560 .ia_size = buffer_size,
540 .ia_valid = ATTR_SIZE | ATTR_CTIME, 561 .ia_valid = ATTR_SIZE | ATTR_CTIME,
541 }; 562 };
563
564 reiserfs_write_unlock(inode->i_sb);
542 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); 565 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
543 down_write(&dentry->d_inode->i_alloc_sem); 566 down_write(&dentry->d_inode->i_alloc_sem);
567 reiserfs_write_lock(inode->i_sb);
568
544 err = reiserfs_setattr(dentry, &newattrs); 569 err = reiserfs_setattr(dentry, &newattrs);
545 up_write(&dentry->d_inode->i_alloc_sem); 570 up_write(&dentry->d_inode->i_alloc_sem);
546 mutex_unlock(&dentry->d_inode->i_mutex); 571 mutex_unlock(&dentry->d_inode->i_mutex);
@@ -726,15 +751,14 @@ ssize_t
726reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, 751reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
727 size_t size) 752 size_t size)
728{ 753{
729 struct inode *inode = dentry->d_inode;
730 struct xattr_handler *handler; 754 struct xattr_handler *handler;
731 755
732 handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name); 756 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
733 757
734 if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1) 758 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
735 return -EOPNOTSUPP; 759 return -EOPNOTSUPP;
736 760
737 return handler->get(inode, name, buffer, size); 761 return handler->get(dentry, name, buffer, size, handler->flags);
738} 762}
739 763
740/* 764/*
@@ -746,15 +770,14 @@ int
746reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, 770reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
747 size_t size, int flags) 771 size_t size, int flags)
748{ 772{
749 struct inode *inode = dentry->d_inode;
750 struct xattr_handler *handler; 773 struct xattr_handler *handler;
751 774
752 handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name); 775 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
753 776
754 if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1) 777 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
755 return -EOPNOTSUPP; 778 return -EOPNOTSUPP;
756 779
757 return handler->set(inode, name, value, size, flags); 780 return handler->set(dentry, name, value, size, flags, handler->flags);
758} 781}
759 782
760/* 783/*
@@ -764,21 +787,20 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
764 */ 787 */
765int reiserfs_removexattr(struct dentry *dentry, const char *name) 788int reiserfs_removexattr(struct dentry *dentry, const char *name)
766{ 789{
767 struct inode *inode = dentry->d_inode;
768 struct xattr_handler *handler; 790 struct xattr_handler *handler;
769 handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name); 791 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
770 792
771 if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1) 793 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
772 return -EOPNOTSUPP; 794 return -EOPNOTSUPP;
773 795
774 return handler->set(inode, name, NULL, 0, XATTR_REPLACE); 796 return handler->set(dentry, name, NULL, 0, XATTR_REPLACE, handler->flags);
775} 797}
776 798
777struct listxattr_buf { 799struct listxattr_buf {
778 size_t size; 800 size_t size;
779 size_t pos; 801 size_t pos;
780 char *buf; 802 char *buf;
781 struct inode *inode; 803 struct dentry *dentry;
782}; 804};
783 805
784static int listxattr_filler(void *buf, const char *name, int namelen, 806static int listxattr_filler(void *buf, const char *name, int namelen,
@@ -789,17 +811,19 @@ static int listxattr_filler(void *buf, const char *name, int namelen,
789 if (name[0] != '.' || 811 if (name[0] != '.' ||
790 (namelen != 1 && (name[1] != '.' || namelen != 2))) { 812 (namelen != 1 && (name[1] != '.' || namelen != 2))) {
791 struct xattr_handler *handler; 813 struct xattr_handler *handler;
792 handler = find_xattr_handler_prefix(b->inode->i_sb->s_xattr, 814 handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
793 name); 815 name);
794 if (!handler) /* Unsupported xattr name */ 816 if (!handler) /* Unsupported xattr name */
795 return 0; 817 return 0;
796 if (b->buf) { 818 if (b->buf) {
797 size = handler->list(b->inode, b->buf + b->pos, 819 size = handler->list(b->dentry, b->buf + b->pos,
798 b->size, name, namelen); 820 b->size, name, namelen,
821 handler->flags);
799 if (size > b->size) 822 if (size > b->size)
800 return -ERANGE; 823 return -ERANGE;
801 } else { 824 } else {
802 size = handler->list(b->inode, NULL, 0, name, namelen); 825 size = handler->list(b->dentry, NULL, 0, name,
826 namelen, handler->flags);
803 } 827 }
804 828
805 b->pos += size; 829 b->pos += size;
@@ -820,7 +844,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
820 int err = 0; 844 int err = 0;
821 loff_t pos = 0; 845 loff_t pos = 0;
822 struct listxattr_buf buf = { 846 struct listxattr_buf buf = {
823 .inode = dentry->d_inode, 847 .dentry = dentry,
824 .buf = buffer, 848 .buf = buffer,
825 .size = buffer ? size : 0, 849 .size = buffer ? size : 0,
826 }; 850 };
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 35d6e672a279..dd20a7883f0f 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -15,8 +15,10 @@ static int reiserfs_set_acl(struct reiserfs_transaction_handle *th,
15 struct posix_acl *acl); 15 struct posix_acl *acl);
16 16
17static int 17static int
18xattr_set_acl(struct inode *inode, int type, const void *value, size_t size) 18posix_acl_set(struct dentry *dentry, const char *name, const void *value,
19 size_t size, int flags, int type)
19{ 20{
21 struct inode *inode = dentry->d_inode;
20 struct posix_acl *acl; 22 struct posix_acl *acl;
21 int error, error2; 23 int error, error2;
22 struct reiserfs_transaction_handle th; 24 struct reiserfs_transaction_handle th;
@@ -60,15 +62,16 @@ xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
60} 62}
61 63
62static int 64static int
63xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 65posix_acl_get(struct dentry *dentry, const char *name, void *buffer,
66 size_t size, int type)
64{ 67{
65 struct posix_acl *acl; 68 struct posix_acl *acl;
66 int error; 69 int error;
67 70
68 if (!reiserfs_posixacl(inode->i_sb)) 71 if (!reiserfs_posixacl(dentry->d_sb))
69 return -EOPNOTSUPP; 72 return -EOPNOTSUPP;
70 73
71 acl = reiserfs_get_acl(inode, type); 74 acl = reiserfs_get_acl(dentry->d_inode, type);
72 if (IS_ERR(acl)) 75 if (IS_ERR(acl))
73 return PTR_ERR(acl); 76 return PTR_ERR(acl);
74 if (acl == NULL) 77 if (acl == NULL)
@@ -452,7 +455,9 @@ int reiserfs_acl_chmod(struct inode *inode)
452 return 0; 455 return 0;
453 } 456 }
454 457
458 reiserfs_write_unlock(inode->i_sb);
455 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); 459 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
460 reiserfs_write_lock(inode->i_sb);
456 if (!acl) 461 if (!acl)
457 return 0; 462 return 0;
458 if (IS_ERR(acl)) 463 if (IS_ERR(acl))
@@ -482,30 +487,12 @@ int reiserfs_acl_chmod(struct inode *inode)
482 return error; 487 return error;
483} 488}
484 489
485static int 490static size_t posix_acl_access_list(struct dentry *dentry, char *list,
486posix_acl_access_get(struct inode *inode, const char *name,
487 void *buffer, size_t size)
488{
489 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
490 return -EINVAL;
491 return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
492}
493
494static int
495posix_acl_access_set(struct inode *inode, const char *name,
496 const void *value, size_t size, int flags)
497{
498 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
499 return -EINVAL;
500 return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
501}
502
503static size_t posix_acl_access_list(struct inode *inode, char *list,
504 size_t list_size, const char *name, 491 size_t list_size, const char *name,
505 size_t name_len) 492 size_t name_len, int type)
506{ 493{
507 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); 494 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
508 if (!reiserfs_posixacl(inode->i_sb)) 495 if (!reiserfs_posixacl(dentry->d_sb))
509 return 0; 496 return 0;
510 if (list && size <= list_size) 497 if (list && size <= list_size)
511 memcpy(list, POSIX_ACL_XATTR_ACCESS, size); 498 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -514,35 +501,18 @@ static size_t posix_acl_access_list(struct inode *inode, char *list,
514 501
515struct xattr_handler reiserfs_posix_acl_access_handler = { 502struct xattr_handler reiserfs_posix_acl_access_handler = {
516 .prefix = POSIX_ACL_XATTR_ACCESS, 503 .prefix = POSIX_ACL_XATTR_ACCESS,
517 .get = posix_acl_access_get, 504 .flags = ACL_TYPE_ACCESS,
518 .set = posix_acl_access_set, 505 .get = posix_acl_get,
506 .set = posix_acl_set,
519 .list = posix_acl_access_list, 507 .list = posix_acl_access_list,
520}; 508};
521 509
522static int 510static size_t posix_acl_default_list(struct dentry *dentry, char *list,
523posix_acl_default_get(struct inode *inode, const char *name,
524 void *buffer, size_t size)
525{
526 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
527 return -EINVAL;
528 return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
529}
530
531static int
532posix_acl_default_set(struct inode *inode, const char *name,
533 const void *value, size_t size, int flags)
534{
535 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
536 return -EINVAL;
537 return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
538}
539
540static size_t posix_acl_default_list(struct inode *inode, char *list,
541 size_t list_size, const char *name, 511 size_t list_size, const char *name,
542 size_t name_len) 512 size_t name_len, int type)
543{ 513{
544 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); 514 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
545 if (!reiserfs_posixacl(inode->i_sb)) 515 if (!reiserfs_posixacl(dentry->d_sb))
546 return 0; 516 return 0;
547 if (list && size <= list_size) 517 if (list && size <= list_size)
548 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); 518 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -551,7 +521,8 @@ static size_t posix_acl_default_list(struct inode *inode, char *list,
551 521
552struct xattr_handler reiserfs_posix_acl_default_handler = { 522struct xattr_handler reiserfs_posix_acl_default_handler = {
553 .prefix = POSIX_ACL_XATTR_DEFAULT, 523 .prefix = POSIX_ACL_XATTR_DEFAULT,
554 .get = posix_acl_default_get, 524 .flags = ACL_TYPE_DEFAULT,
555 .set = posix_acl_default_set, 525 .get = posix_acl_get,
526 .set = posix_acl_set,
556 .list = posix_acl_default_list, 527 .list = posix_acl_default_list,
557}; 528};
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index a92c8792c0f6..d8b5bfcbdd30 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -8,36 +8,37 @@
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10static int 10static int
11security_get(struct inode *inode, const char *name, void *buffer, size_t size) 11security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
12 int handler_flags)
12{ 13{
13 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 14 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
14 return -EINVAL; 15 return -EINVAL;
15 16
16 if (IS_PRIVATE(inode)) 17 if (IS_PRIVATE(dentry->d_inode))
17 return -EPERM; 18 return -EPERM;
18 19
19 return reiserfs_xattr_get(inode, name, buffer, size); 20 return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
20} 21}
21 22
22static int 23static int
23security_set(struct inode *inode, const char *name, const void *buffer, 24security_set(struct dentry *dentry, const char *name, const void *buffer,
24 size_t size, int flags) 25 size_t size, int flags, int handler_flags)
25{ 26{
26 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 27 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
27 return -EINVAL; 28 return -EINVAL;
28 29
29 if (IS_PRIVATE(inode)) 30 if (IS_PRIVATE(dentry->d_inode))
30 return -EPERM; 31 return -EPERM;
31 32
32 return reiserfs_xattr_set(inode, name, buffer, size, flags); 33 return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
33} 34}
34 35
35static size_t security_list(struct inode *inode, char *list, size_t list_len, 36static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
36 const char *name, size_t namelen) 37 const char *name, size_t namelen, int handler_flags)
37{ 38{
38 const size_t len = namelen + 1; 39 const size_t len = namelen + 1;
39 40
40 if (IS_PRIVATE(inode)) 41 if (IS_PRIVATE(dentry->d_inode))
41 return 0; 42 return 0;
42 43
43 if (list && len <= list_len) { 44 if (list && len <= list_len) {
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a865042f75e2..5b08aaca3daf 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -8,36 +8,37 @@
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10static int 10static int
11trusted_get(struct inode *inode, const char *name, void *buffer, size_t size) 11trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
12 int handler_flags)
12{ 13{
13 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 14 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
14 return -EINVAL; 15 return -EINVAL;
15 16
16 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) 17 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
17 return -EPERM; 18 return -EPERM;
18 19
19 return reiserfs_xattr_get(inode, name, buffer, size); 20 return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
20} 21}
21 22
22static int 23static int
23trusted_set(struct inode *inode, const char *name, const void *buffer, 24trusted_set(struct dentry *dentry, const char *name, const void *buffer,
24 size_t size, int flags) 25 size_t size, int flags, int handler_flags)
25{ 26{
26 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 27 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
27 return -EINVAL; 28 return -EINVAL;
28 29
29 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) 30 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
30 return -EPERM; 31 return -EPERM;
31 32
32 return reiserfs_xattr_set(inode, name, buffer, size, flags); 33 return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
33} 34}
34 35
35static size_t trusted_list(struct inode *inode, char *list, size_t list_size, 36static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size,
36 const char *name, size_t name_len) 37 const char *name, size_t name_len, int handler_flags)
37{ 38{
38 const size_t len = name_len + 1; 39 const size_t len = name_len + 1;
39 40
40 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) 41 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
41 return 0; 42 return 0;
42 43
43 if (list && len <= list_size) { 44 if (list && len <= list_size) {
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index e3238dc4f3db..75d59c49b911 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -7,34 +7,35 @@
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8 8
9static int 9static int
10user_get(struct inode *inode, const char *name, void *buffer, size_t size) 10user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
11 int handler_flags)
11{ 12{
12 13
13 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 14 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
14 return -EINVAL; 15 return -EINVAL;
15 if (!reiserfs_xattrs_user(inode->i_sb)) 16 if (!reiserfs_xattrs_user(dentry->d_sb))
16 return -EOPNOTSUPP; 17 return -EOPNOTSUPP;
17 return reiserfs_xattr_get(inode, name, buffer, size); 18 return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
18} 19}
19 20
20static int 21static int
21user_set(struct inode *inode, const char *name, const void *buffer, 22user_set(struct dentry *dentry, const char *name, const void *buffer,
22 size_t size, int flags) 23 size_t size, int flags, int handler_flags)
23{ 24{
24 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 25 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
25 return -EINVAL; 26 return -EINVAL;
26 27
27 if (!reiserfs_xattrs_user(inode->i_sb)) 28 if (!reiserfs_xattrs_user(dentry->d_sb))
28 return -EOPNOTSUPP; 29 return -EOPNOTSUPP;
29 return reiserfs_xattr_set(inode, name, buffer, size, flags); 30 return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
30} 31}
31 32
32static size_t user_list(struct inode *inode, char *list, size_t list_size, 33static size_t user_list(struct dentry *dentry, char *list, size_t list_size,
33 const char *name, size_t name_len) 34 const char *name, size_t name_len, int handler_flags)
34{ 35{
35 const size_t len = name_len + 1; 36 const size_t len = name_len + 1;
36 37
37 if (!reiserfs_xattrs_user(inode->i_sb)) 38 if (!reiserfs_xattrs_user(dentry->d_sb))
38 return 0; 39 return 0;
39 if (list && len <= list_size) { 40 if (list && len <= list_size) {
40 memcpy(list, name, name_len); 41 memcpy(list, name, name_len);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index b07565c94386..1dabe4ee02fe 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -236,7 +236,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
236 * anon_inode_getfd() will install the fd. 236 * anon_inode_getfd() will install the fd.
237 */ 237 */
238 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx, 238 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
239 flags & (O_CLOEXEC | O_NONBLOCK)); 239 O_RDWR | (flags & (O_CLOEXEC | O_NONBLOCK)));
240 if (ufd < 0) 240 if (ufd < 0)
241 kfree(ctx); 241 kfree(ctx);
242 } else { 242 } else {
diff --git a/fs/stack.c b/fs/stack.c
index 67716f6a1a4a..4a6f7f440658 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -7,18 +7,63 @@
7 * This function cannot be inlined since i_size_{read,write} is rather 7 * This function cannot be inlined since i_size_{read,write} is rather
8 * heavy-weight on 32-bit systems 8 * heavy-weight on 32-bit systems
9 */ 9 */
10void fsstack_copy_inode_size(struct inode *dst, const struct inode *src) 10void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
11{ 11{
12 i_size_write(dst, i_size_read((struct inode *)src)); 12 loff_t i_size;
13 dst->i_blocks = src->i_blocks; 13 blkcnt_t i_blocks;
14
15 /*
16 * i_size_read() includes its own seqlocking and protection from
17 * preemption (see include/linux/fs.h): we need nothing extra for
18 * that here, and prefer to avoid nesting locks than attempt to keep
19 * i_size and i_blocks in sync together.
20 */
21 i_size = i_size_read(src);
22
23 /*
24 * But if CONFIG_LBDAF (on 32-bit), we ought to make an effort to
25 * keep the two halves of i_blocks in sync despite SMP or PREEMPT -
26 * though stat's generic_fillattr() doesn't bother, and we won't be
27 * applying quotas (where i_blocks does become important) at the
28 * upper level.
29 *
30 * We don't actually know what locking is used at the lower level;
31 * but if it's a filesystem that supports quotas, it will be using
32 * i_lock as in inode_add_bytes(). tmpfs uses other locking, and
33 * its 32-bit is (just) able to exceed 2TB i_size with the aid of
34 * holes; but its i_blocks cannot carry into the upper long without
35 * almost 2TB swap - let's ignore that case.
36 */
37 if (sizeof(i_blocks) > sizeof(long))
38 spin_lock(&src->i_lock);
39 i_blocks = src->i_blocks;
40 if (sizeof(i_blocks) > sizeof(long))
41 spin_unlock(&src->i_lock);
42
43 /*
44 * If CONFIG_SMP or CONFIG_PREEMPT on 32-bit, it's vital for
45 * fsstack_copy_inode_size() to hold some lock around
46 * i_size_write(), otherwise i_size_read() may spin forever (see
47 * include/linux/fs.h). We don't necessarily hold i_mutex when this
48 * is called, so take i_lock for that case.
49 *
50 * And if CONFIG_LBADF (on 32-bit), continue our effort to keep the
51 * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock
52 * for that case too, and do both at once by combining the tests.
53 *
54 * There is none of this locking overhead in the 64-bit case.
55 */
56 if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
57 spin_lock(&dst->i_lock);
58 i_size_write(dst, i_size);
59 dst->i_blocks = i_blocks;
60 if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
61 spin_unlock(&dst->i_lock);
14} 62}
15EXPORT_SYMBOL_GPL(fsstack_copy_inode_size); 63EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
16 64
17/* copy all attributes; get_nlinks is optional way to override the i_nlink 65/* copy all attributes */
18 * copying 66void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
19 */
20void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
21 int (*get_nlinks)(struct inode *))
22{ 67{
23 dest->i_mode = src->i_mode; 68 dest->i_mode = src->i_mode;
24 dest->i_uid = src->i_uid; 69 dest->i_uid = src->i_uid;
@@ -29,14 +74,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
29 dest->i_ctime = src->i_ctime; 74 dest->i_ctime = src->i_ctime;
30 dest->i_blkbits = src->i_blkbits; 75 dest->i_blkbits = src->i_blkbits;
31 dest->i_flags = src->i_flags; 76 dest->i_flags = src->i_flags;
32 77 dest->i_nlink = src->i_nlink;
33 /*
34 * Update the nlinks AFTER updating the above fields, because the
35 * get_links callback may depend on them.
36 */
37 if (!get_nlinks)
38 dest->i_nlink = src->i_nlink;
39 else
40 dest->i_nlink = (*get_nlinks)(dest);
41} 78}
42EXPORT_SYMBOL_GPL(fsstack_copy_attr_all); 79EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
diff --git a/fs/stat.c b/fs/stat.c
index 075694e31d8b..c4ecd52c5737 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -401,9 +401,9 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename,
401} 401}
402#endif /* __ARCH_WANT_STAT64 */ 402#endif /* __ARCH_WANT_STAT64 */
403 403
404void inode_add_bytes(struct inode *inode, loff_t bytes) 404/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
405void __inode_add_bytes(struct inode *inode, loff_t bytes)
405{ 406{
406 spin_lock(&inode->i_lock);
407 inode->i_blocks += bytes >> 9; 407 inode->i_blocks += bytes >> 9;
408 bytes &= 511; 408 bytes &= 511;
409 inode->i_bytes += bytes; 409 inode->i_bytes += bytes;
@@ -411,6 +411,12 @@ void inode_add_bytes(struct inode *inode, loff_t bytes)
411 inode->i_blocks++; 411 inode->i_blocks++;
412 inode->i_bytes -= 512; 412 inode->i_bytes -= 512;
413 } 413 }
414}
415
416void inode_add_bytes(struct inode *inode, loff_t bytes)
417{
418 spin_lock(&inode->i_lock);
419 __inode_add_bytes(inode, bytes);
414 spin_unlock(&inode->i_lock); 420 spin_unlock(&inode->i_lock);
415} 421}
416 422
diff --git a/fs/super.c b/fs/super.c
index 19eb70b374bc..aff046b0fe78 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -901,8 +901,9 @@ int get_sb_single(struct file_system_type *fs_type,
901 return error; 901 return error;
902 } 902 }
903 s->s_flags |= MS_ACTIVE; 903 s->s_flags |= MS_ACTIVE;
904 } else {
905 do_remount_sb(s, flags, data, 0);
904 } 906 }
905 do_remount_sb(s, flags, data, 0);
906 simple_set_mnt(mnt, s); 907 simple_set_mnt(mnt, s);
907 return 0; 908 return 0;
908} 909}
diff --git a/fs/sync.c b/fs/sync.c
index 36752a683481..418727a2a239 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -355,6 +355,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
355{ 355{
356 int ret; 356 int ret;
357 struct file *file; 357 struct file *file;
358 struct address_space *mapping;
358 loff_t endbyte; /* inclusive */ 359 loff_t endbyte; /* inclusive */
359 int fput_needed; 360 int fput_needed;
360 umode_t i_mode; 361 umode_t i_mode;
@@ -405,7 +406,28 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
405 !S_ISLNK(i_mode)) 406 !S_ISLNK(i_mode))
406 goto out_put; 407 goto out_put;
407 408
408 ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags); 409 mapping = file->f_mapping;
410 if (!mapping) {
411 ret = -EINVAL;
412 goto out_put;
413 }
414
415 ret = 0;
416 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
417 ret = filemap_fdatawait_range(mapping, offset, endbyte);
418 if (ret < 0)
419 goto out_put;
420 }
421
422 if (flags & SYNC_FILE_RANGE_WRITE) {
423 ret = filemap_fdatawrite_range(mapping, offset, endbyte);
424 if (ret < 0)
425 goto out_put;
426 }
427
428 if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
429 ret = filemap_fdatawait_range(mapping, offset, endbyte);
430
409out_put: 431out_put:
410 fput_light(file, fput_needed); 432 fput_light(file, fput_needed);
411out: 433out:
@@ -437,38 +459,3 @@ asmlinkage long SyS_sync_file_range2(long fd, long flags,
437} 459}
438SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2); 460SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2);
439#endif 461#endif
440
441/*
442 * `endbyte' is inclusive
443 */
444int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
445 loff_t endbyte, unsigned int flags)
446{
447 int ret;
448
449 if (!mapping) {
450 ret = -EINVAL;
451 goto out;
452 }
453
454 ret = 0;
455 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
456 ret = filemap_fdatawait_range(mapping, offset, endbyte);
457 if (ret < 0)
458 goto out;
459 }
460
461 if (flags & SYNC_FILE_RANGE_WRITE) {
462 ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
463 WB_SYNC_ALL);
464 if (ret < 0)
465 goto out;
466 }
467
468 if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
469 ret = filemap_fdatawait_range(mapping, offset, endbyte);
470 }
471out:
472 return ret;
473}
474EXPORT_SYMBOL_GPL(do_sync_mapping_range);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 60c702bc10ae..a0a500af24a1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -483,7 +483,8 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd)
483 * @attr: attribute descriptor. 483 * @attr: attribute descriptor.
484 */ 484 */
485 485
486int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) 486int sysfs_create_bin_file(struct kobject *kobj,
487 const struct bin_attribute *attr)
487{ 488{
488 BUG_ON(!kobj || !kobj->sd || !attr); 489 BUG_ON(!kobj || !kobj->sd || !attr);
489 490
@@ -497,7 +498,8 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
497 * @attr: attribute descriptor. 498 * @attr: attribute descriptor.
498 */ 499 */
499 500
500void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) 501void sysfs_remove_bin_file(struct kobject *kobj,
502 const struct bin_attribute *attr)
501{ 503{
502 sysfs_hash_and_remove(kobj->sd, attr->attr.name); 504 sysfs_hash_and_remove(kobj->sd, attr->attr.name);
503} 505}
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index f05f2303a8b8..699f371b9f12 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -106,8 +106,10 @@ static struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
106 return NULL; 106 return NULL;
107 107
108 t = atomic_cmpxchg(&sd->s_active, v, v + 1); 108 t = atomic_cmpxchg(&sd->s_active, v, v + 1);
109 if (likely(t == v)) 109 if (likely(t == v)) {
110 rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
110 return sd; 111 return sd;
112 }
111 if (t < 0) 113 if (t < 0)
112 return NULL; 114 return NULL;
113 115
@@ -130,6 +132,7 @@ static void sysfs_put_active(struct sysfs_dirent *sd)
130 if (unlikely(!sd)) 132 if (unlikely(!sd))
131 return; 133 return;
132 134
135 rwsem_release(&sd->dep_map, 1, _RET_IP_);
133 v = atomic_dec_return(&sd->s_active); 136 v = atomic_dec_return(&sd->s_active);
134 if (likely(v != SD_DEACTIVATED_BIAS)) 137 if (likely(v != SD_DEACTIVATED_BIAS))
135 return; 138 return;
@@ -194,15 +197,21 @@ static void sysfs_deactivate(struct sysfs_dirent *sd)
194 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED)); 197 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
195 sd->s_sibling = (void *)&wait; 198 sd->s_sibling = (void *)&wait;
196 199
200 rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_);
197 /* atomic_add_return() is a mb(), put_active() will always see 201 /* atomic_add_return() is a mb(), put_active() will always see
198 * the updated sd->s_sibling. 202 * the updated sd->s_sibling.
199 */ 203 */
200 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); 204 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
201 205
202 if (v != SD_DEACTIVATED_BIAS) 206 if (v != SD_DEACTIVATED_BIAS) {
207 lock_contended(&sd->dep_map, _RET_IP_);
203 wait_for_completion(&wait); 208 wait_for_completion(&wait);
209 }
204 210
205 sd->s_sibling = NULL; 211 sd->s_sibling = NULL;
212
213 lock_acquired(&sd->dep_map, _RET_IP_);
214 rwsem_release(&sd->dep_map, 1, _RET_IP_);
206} 215}
207 216
208static int sysfs_alloc_ino(ino_t *pino) 217static int sysfs_alloc_ino(ino_t *pino)
@@ -345,6 +354,7 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
345 354
346 atomic_set(&sd->s_count, 1); 355 atomic_set(&sd->s_count, 1);
347 atomic_set(&sd->s_active, 0); 356 atomic_set(&sd->s_active, 0);
357 sysfs_dirent_init_lockdep(sd);
348 358
349 sd->s_name = name; 359 sd->s_name = name;
350 sd->s_mode = mode; 360 sd->s_mode = mode;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ca52e7b9d8f8..cdd9377a6e06 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -8,6 +8,7 @@
8 * This file is released under the GPLv2. 8 * This file is released under the GPLv2.
9 */ 9 */
10 10
11#include <linux/lockdep.h>
11#include <linux/fs.h> 12#include <linux/fs.h>
12 13
13struct sysfs_open_dirent; 14struct sysfs_open_dirent;
@@ -50,6 +51,9 @@ struct sysfs_inode_attrs {
50struct sysfs_dirent { 51struct sysfs_dirent {
51 atomic_t s_count; 52 atomic_t s_count;
52 atomic_t s_active; 53 atomic_t s_active;
54#ifdef CONFIG_DEBUG_LOCK_ALLOC
55 struct lockdep_map dep_map;
56#endif
53 struct sysfs_dirent *s_parent; 57 struct sysfs_dirent *s_parent;
54 struct sysfs_dirent *s_sibling; 58 struct sysfs_dirent *s_sibling;
55 const char *s_name; 59 const char *s_name;
@@ -84,6 +88,17 @@ static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
84 return sd->s_flags & SYSFS_TYPE_MASK; 88 return sd->s_flags & SYSFS_TYPE_MASK;
85} 89}
86 90
91#ifdef CONFIG_DEBUG_LOCK_ALLOC
92#define sysfs_dirent_init_lockdep(sd) \
93do { \
94 static struct lock_class_key __key; \
95 \
96 lockdep_init_map(&sd->dep_map, "s_active", &__key, 0); \
97} while(0)
98#else
99#define sysfs_dirent_init_lockdep(sd) do {} while(0)
100#endif
101
87/* 102/*
88 * Context structure to be used while adding/removing nodes. 103 * Context structure to be used while adding/removing nodes.
89 */ 104 */
diff --git a/fs/timerfd.c b/fs/timerfd.c
index b042bd7034b1..1bfc95ad5f71 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -200,7 +200,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
200 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 200 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
201 201
202 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 202 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
203 flags & TFD_SHARED_FCNTL_FLAGS); 203 O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
204 if (ufd < 0) 204 if (ufd < 0)
205 kfree(ctx); 205 kfree(ctx);
206 206
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 8a771c59ac3e..90492327b383 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -350,13 +350,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
350 le32_to_cpu(sup->fmt_version)); 350 le32_to_cpu(sup->fmt_version));
351 printk(KERN_DEBUG "\ttime_gran %u\n", 351 printk(KERN_DEBUG "\ttime_gran %u\n",
352 le32_to_cpu(sup->time_gran)); 352 le32_to_cpu(sup->time_gran));
353 printk(KERN_DEBUG "\tUUID %02X%02X%02X%02X-%02X%02X" 353 printk(KERN_DEBUG "\tUUID %pUB\n",
354 "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n", 354 sup->uuid);
355 sup->uuid[0], sup->uuid[1], sup->uuid[2], sup->uuid[3],
356 sup->uuid[4], sup->uuid[5], sup->uuid[6], sup->uuid[7],
357 sup->uuid[8], sup->uuid[9], sup->uuid[10], sup->uuid[11],
358 sup->uuid[12], sup->uuid[13], sup->uuid[14],
359 sup->uuid[15]);
360 break; 355 break;
361 } 356 }
362 case UBIFS_MST_NODE: 357 case UBIFS_MST_NODE:
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 39849f887e72..16a6444330ec 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -45,7 +45,7 @@
45 * 45 *
46 * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the 46 * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
47 * read-ahead path does not lock it ("sys_read -> generic_file_aio_read -> 47 * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
48 * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not 48 * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not
49 * set as well. However, UBIFS disables readahead. 49 * set as well. However, UBIFS disables readahead.
50 */ 50 */
51 51
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 618c2701d3a7..e5a3d8e96bb7 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -54,6 +54,7 @@
54 */ 54 */
55 55
56#include <linux/pagemap.h> 56#include <linux/pagemap.h>
57#include <linux/list_sort.h>
57#include "ubifs.h" 58#include "ubifs.h"
58 59
59/* 60/*
@@ -108,101 +109,6 @@ static int switch_gc_head(struct ubifs_info *c)
108} 109}
109 110
110/** 111/**
111 * list_sort - sort a list.
112 * @priv: private data, passed to @cmp
113 * @head: the list to sort
114 * @cmp: the elements comparison function
115 *
116 * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
117 * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
118 * in ascending order.
119 *
120 * The comparison function @cmp is supposed to return a negative value if @a is
121 * than @b, and a positive value if @a is greater than @b. If @a and @b are
122 * equivalent, then it does not matter what this function returns.
123 */
124static void list_sort(void *priv, struct list_head *head,
125 int (*cmp)(void *priv, struct list_head *a,
126 struct list_head *b))
127{
128 struct list_head *p, *q, *e, *list, *tail, *oldhead;
129 int insize, nmerges, psize, qsize, i;
130
131 if (list_empty(head))
132 return;
133
134 list = head->next;
135 list_del(head);
136 insize = 1;
137 for (;;) {
138 p = oldhead = list;
139 list = tail = NULL;
140 nmerges = 0;
141
142 while (p) {
143 nmerges++;
144 q = p;
145 psize = 0;
146 for (i = 0; i < insize; i++) {
147 psize++;
148 q = q->next == oldhead ? NULL : q->next;
149 if (!q)
150 break;
151 }
152
153 qsize = insize;
154 while (psize > 0 || (qsize > 0 && q)) {
155 if (!psize) {
156 e = q;
157 q = q->next;
158 qsize--;
159 if (q == oldhead)
160 q = NULL;
161 } else if (!qsize || !q) {
162 e = p;
163 p = p->next;
164 psize--;
165 if (p == oldhead)
166 p = NULL;
167 } else if (cmp(priv, p, q) <= 0) {
168 e = p;
169 p = p->next;
170 psize--;
171 if (p == oldhead)
172 p = NULL;
173 } else {
174 e = q;
175 q = q->next;
176 qsize--;
177 if (q == oldhead)
178 q = NULL;
179 }
180 if (tail)
181 tail->next = e;
182 else
183 list = e;
184 e->prev = tail;
185 tail = e;
186 }
187 p = q;
188 }
189
190 tail->next = list;
191 list->prev = tail;
192
193 if (nmerges <= 1)
194 break;
195
196 insize *= 2;
197 }
198
199 head->next = list;
200 head->prev = list->prev;
201 list->prev->next = head;
202 list->prev = head;
203}
204
205/**
206 * data_nodes_cmp - compare 2 data nodes. 112 * data_nodes_cmp - compare 2 data nodes.
207 * @priv: UBIFS file-system description object 113 * @priv: UBIFS file-system description object
208 * @a: first data node 114 * @a: first data node
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 943ad5624530..43f9d19a6f33 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1393,12 +1393,7 @@ static int mount_ubifs(struct ubifs_info *c)
1393 c->leb_size, c->leb_size >> 10); 1393 c->leb_size, c->leb_size >> 10);
1394 dbg_msg("data journal heads: %d", 1394 dbg_msg("data journal heads: %d",
1395 c->jhead_cnt - NONDATA_JHEADS_CNT); 1395 c->jhead_cnt - NONDATA_JHEADS_CNT);
1396 dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X" 1396 dbg_msg("UUID: %pUB", c->uuid);
1397 "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X",
1398 c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3],
1399 c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7],
1400 c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11],
1401 c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]);
1402 dbg_msg("big_lpt %d", c->big_lpt); 1397 dbg_msg("big_lpt %d", c->big_lpt);
1403 dbg_msg("log LEBs: %d (%d - %d)", 1398 dbg_msg("log LEBs: %d (%d - %d)",
1404 c->log_lebs, UBIFS_LOG_LNUM, c->log_last); 1399 c->log_lebs, UBIFS_LOG_LNUM, c->log_last);
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 6f671f1ac271..22af68f8b682 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -70,13 +70,13 @@ static inline unsigned long ufs_dir_pages(struct inode *inode)
70 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; 70 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
71} 71}
72 72
73ino_t ufs_inode_by_name(struct inode *dir, struct dentry *dentry) 73ino_t ufs_inode_by_name(struct inode *dir, struct qstr *qstr)
74{ 74{
75 ino_t res = 0; 75 ino_t res = 0;
76 struct ufs_dir_entry *de; 76 struct ufs_dir_entry *de;
77 struct page *page; 77 struct page *page;
78 78
79 de = ufs_find_entry(dir, dentry, &page); 79 de = ufs_find_entry(dir, qstr, &page);
80 if (de) { 80 if (de) {
81 res = fs32_to_cpu(dir->i_sb, de->d_ino); 81 res = fs32_to_cpu(dir->i_sb, de->d_ino);
82 ufs_put_page(page); 82 ufs_put_page(page);
@@ -249,12 +249,12 @@ struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p)
249 * (as a parameter - res_dir). Page is returned mapped and unlocked. 249 * (as a parameter - res_dir). Page is returned mapped and unlocked.
250 * Entry is guaranteed to be valid. 250 * Entry is guaranteed to be valid.
251 */ 251 */
252struct ufs_dir_entry *ufs_find_entry(struct inode *dir, struct dentry *dentry, 252struct ufs_dir_entry *ufs_find_entry(struct inode *dir, struct qstr *qstr,
253 struct page **res_page) 253 struct page **res_page)
254{ 254{
255 struct super_block *sb = dir->i_sb; 255 struct super_block *sb = dir->i_sb;
256 const char *name = dentry->d_name.name; 256 const char *name = qstr->name;
257 int namelen = dentry->d_name.len; 257 int namelen = qstr->len;
258 unsigned reclen = UFS_DIR_REC_LEN(namelen); 258 unsigned reclen = UFS_DIR_REC_LEN(namelen);
259 unsigned long start, n; 259 unsigned long start, n;
260 unsigned long npages = ufs_dir_pages(dir); 260 unsigned long npages = ufs_dir_pages(dir);
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 23119fe7ad62..4c26d9e8bc94 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -56,7 +56,7 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru
56 return ERR_PTR(-ENAMETOOLONG); 56 return ERR_PTR(-ENAMETOOLONG);
57 57
58 lock_kernel(); 58 lock_kernel();
59 ino = ufs_inode_by_name(dir, dentry); 59 ino = ufs_inode_by_name(dir, &dentry->d_name);
60 if (ino) { 60 if (ino) {
61 inode = ufs_iget(dir->i_sb, ino); 61 inode = ufs_iget(dir->i_sb, ino);
62 if (IS_ERR(inode)) { 62 if (IS_ERR(inode)) {
@@ -237,7 +237,7 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry)
237 struct page *page; 237 struct page *page;
238 int err = -ENOENT; 238 int err = -ENOENT;
239 239
240 de = ufs_find_entry(dir, dentry, &page); 240 de = ufs_find_entry(dir, &dentry->d_name, &page);
241 if (!de) 241 if (!de)
242 goto out; 242 goto out;
243 243
@@ -281,7 +281,7 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
281 struct ufs_dir_entry *old_de; 281 struct ufs_dir_entry *old_de;
282 int err = -ENOENT; 282 int err = -ENOENT;
283 283
284 old_de = ufs_find_entry(old_dir, old_dentry, &old_page); 284 old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
285 if (!old_de) 285 if (!old_de)
286 goto out; 286 goto out;
287 287
@@ -301,7 +301,7 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
301 goto out_dir; 301 goto out_dir;
302 302
303 err = -ENOENT; 303 err = -ENOENT;
304 new_de = ufs_find_entry(new_dir, new_dentry, &new_page); 304 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
305 if (!new_de) 305 if (!new_de)
306 goto out_dir; 306 goto out_dir;
307 inode_inc_link_count(old_inode); 307 inode_inc_link_count(old_inode);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 5faed7954d0a..143c20bfb04b 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -66,6 +66,7 @@
66 */ 66 */
67 67
68 68
69#include <linux/exportfs.h>
69#include <linux/module.h> 70#include <linux/module.h>
70#include <linux/bitops.h> 71#include <linux/bitops.h>
71 72
@@ -96,6 +97,56 @@
96#include "swab.h" 97#include "swab.h"
97#include "util.h" 98#include "util.h"
98 99
100static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation)
101{
102 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
103 struct inode *inode;
104
105 if (ino < UFS_ROOTINO || ino > uspi->s_ncg * uspi->s_ipg)
106 return ERR_PTR(-ESTALE);
107
108 inode = ufs_iget(sb, ino);
109 if (IS_ERR(inode))
110 return ERR_CAST(inode);
111 if (generation && inode->i_generation != generation) {
112 iput(inode);
113 return ERR_PTR(-ESTALE);
114 }
115 return inode;
116}
117
118static struct dentry *ufs_fh_to_dentry(struct super_block *sb, struct fid *fid,
119 int fh_len, int fh_type)
120{
121 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, ufs_nfs_get_inode);
122}
123
124static struct dentry *ufs_fh_to_parent(struct super_block *sb, struct fid *fid,
125 int fh_len, int fh_type)
126{
127 return generic_fh_to_parent(sb, fid, fh_len, fh_type, ufs_nfs_get_inode);
128}
129
130static struct dentry *ufs_get_parent(struct dentry *child)
131{
132 struct qstr dot_dot = {
133 .name = "..",
134 .len = 2,
135 };
136 ino_t ino;
137
138 ino = ufs_inode_by_name(child->d_inode, &dot_dot);
139 if (!ino)
140 return ERR_PTR(-ENOENT);
141 return d_obtain_alias(ufs_iget(child->d_inode->i_sb, ino));
142}
143
144static const struct export_operations ufs_export_ops = {
145 .fh_to_dentry = ufs_fh_to_dentry,
146 .fh_to_parent = ufs_fh_to_parent,
147 .get_parent = ufs_get_parent,
148};
149
99#ifdef CONFIG_UFS_DEBUG 150#ifdef CONFIG_UFS_DEBUG
100/* 151/*
101 * Print contents of ufs_super_block, useful for debugging 152 * Print contents of ufs_super_block, useful for debugging
@@ -990,6 +1041,7 @@ magic_found:
990 * Read ufs_super_block into internal data structures 1041 * Read ufs_super_block into internal data structures
991 */ 1042 */
992 sb->s_op = &ufs_super_ops; 1043 sb->s_op = &ufs_super_ops;
1044 sb->s_export_op = &ufs_export_ops;
993 sb->dq_op = NULL; /***/ 1045 sb->dq_op = NULL; /***/
994 sb->s_magic = fs32_to_cpu(sb, usb3->fs_magic); 1046 sb->s_magic = fs32_to_cpu(sb, usb3->fs_magic);
995 1047
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 644e77e13599..0b4c39bc0d9e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -86,9 +86,9 @@ extern void ufs_put_cylinder (struct super_block *, unsigned);
86/* dir.c */ 86/* dir.c */
87extern const struct inode_operations ufs_dir_inode_operations; 87extern const struct inode_operations ufs_dir_inode_operations;
88extern int ufs_add_link (struct dentry *, struct inode *); 88extern int ufs_add_link (struct dentry *, struct inode *);
89extern ino_t ufs_inode_by_name(struct inode *, struct dentry *); 89extern ino_t ufs_inode_by_name(struct inode *, struct qstr *);
90extern int ufs_make_empty(struct inode *, struct inode *); 90extern int ufs_make_empty(struct inode *, struct inode *);
91extern struct ufs_dir_entry *ufs_find_entry(struct inode *, struct dentry *, struct page **); 91extern struct ufs_dir_entry *ufs_find_entry(struct inode *, struct qstr *, struct page **);
92extern int ufs_delete_entry(struct inode *, struct ufs_dir_entry *, struct page *); 92extern int ufs_delete_entry(struct inode *, struct ufs_dir_entry *, struct page *);
93extern int ufs_empty_dir (struct inode *); 93extern int ufs_empty_dir (struct inode *);
94extern struct ufs_dir_entry *ufs_dotdot(struct inode *, struct page **); 94extern struct ufs_dir_entry *ufs_dotdot(struct inode *, struct page **);
diff --git a/fs/xattr.c b/fs/xattr.c
index 6d4f6d3449fb..46f87e828b48 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -615,12 +615,11 @@ ssize_t
615generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size) 615generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size)
616{ 616{
617 struct xattr_handler *handler; 617 struct xattr_handler *handler;
618 struct inode *inode = dentry->d_inode;
619 618
620 handler = xattr_resolve_name(inode->i_sb->s_xattr, &name); 619 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
621 if (!handler) 620 if (!handler)
622 return -EOPNOTSUPP; 621 return -EOPNOTSUPP;
623 return handler->get(inode, name, buffer, size); 622 return handler->get(dentry, name, buffer, size, handler->flags);
624} 623}
625 624
626/* 625/*
@@ -630,18 +629,20 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s
630ssize_t 629ssize_t
631generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) 630generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
632{ 631{
633 struct inode *inode = dentry->d_inode; 632 struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr;
634 struct xattr_handler *handler, **handlers = inode->i_sb->s_xattr;
635 unsigned int size = 0; 633 unsigned int size = 0;
636 634
637 if (!buffer) { 635 if (!buffer) {
638 for_each_xattr_handler(handlers, handler) 636 for_each_xattr_handler(handlers, handler) {
639 size += handler->list(inode, NULL, 0, NULL, 0); 637 size += handler->list(dentry, NULL, 0, NULL, 0,
638 handler->flags);
639 }
640 } else { 640 } else {
641 char *buf = buffer; 641 char *buf = buffer;
642 642
643 for_each_xattr_handler(handlers, handler) { 643 for_each_xattr_handler(handlers, handler) {
644 size = handler->list(inode, buf, buffer_size, NULL, 0); 644 size = handler->list(dentry, buf, buffer_size,
645 NULL, 0, handler->flags);
645 if (size > buffer_size) 646 if (size > buffer_size)
646 return -ERANGE; 647 return -ERANGE;
647 buf += size; 648 buf += size;
@@ -659,14 +660,13 @@ int
659generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) 660generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)
660{ 661{
661 struct xattr_handler *handler; 662 struct xattr_handler *handler;
662 struct inode *inode = dentry->d_inode;
663 663
664 if (size == 0) 664 if (size == 0)
665 value = ""; /* empty EA, do not remove */ 665 value = ""; /* empty EA, do not remove */
666 handler = xattr_resolve_name(inode->i_sb->s_xattr, &name); 666 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
667 if (!handler) 667 if (!handler)
668 return -EOPNOTSUPP; 668 return -EOPNOTSUPP;
669 return handler->set(inode, name, value, size, flags); 669 return handler->set(dentry, name, value, size, 0, handler->flags);
670} 670}
671 671
672/* 672/*
@@ -677,12 +677,12 @@ int
677generic_removexattr(struct dentry *dentry, const char *name) 677generic_removexattr(struct dentry *dentry, const char *name)
678{ 678{
679 struct xattr_handler *handler; 679 struct xattr_handler *handler;
680 struct inode *inode = dentry->d_inode;
681 680
682 handler = xattr_resolve_name(inode->i_sb->s_xattr, &name); 681 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
683 if (!handler) 682 if (!handler)
684 return -EOPNOTSUPP; 683 return -EOPNOTSUPP;
685 return handler->set(inode, name, NULL, 0, XATTR_REPLACE); 684 return handler->set(dentry, name, NULL, 0,
685 XATTR_REPLACE, handler->flags);
686} 686}
687 687
688EXPORT_SYMBOL(generic_getxattr); 688EXPORT_SYMBOL(generic_getxattr);
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 7a59daed1782..56641fe52a23 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -26,6 +26,8 @@ endif
26 26
27obj-$(CONFIG_XFS_FS) += xfs.o 27obj-$(CONFIG_XFS_FS) += xfs.o
28 28
29xfs-y += linux-2.6/xfs_trace.o
30
29xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \ 31xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \
30 xfs_dquot.o \ 32 xfs_dquot.o \
31 xfs_dquot_item.o \ 33 xfs_dquot_item.o \
@@ -90,8 +92,7 @@ xfs-y += xfs_alloc.o \
90 xfs_rw.o \ 92 xfs_rw.o \
91 xfs_dmops.o 93 xfs_dmops.o
92 94
93xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \ 95xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o
94 xfs_dir2_trace.o
95 96
96# Objects in linux/ 97# Objects in linux/
97xfs-y += $(addprefix $(XFS_LINUX)/, \ 98xfs-y += $(addprefix $(XFS_LINUX)/, \
@@ -113,6 +114,3 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
113xfs-y += $(addprefix support/, \ 114xfs-y += $(addprefix support/, \
114 debug.o \ 115 debug.o \
115 uuid.o) 116 uuid.o)
116
117xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o
118
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index b23a54506446..883ca5ab8af5 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -21,6 +21,7 @@
21#include "xfs_bmap_btree.h" 21#include "xfs_bmap_btree.h"
22#include "xfs_inode.h" 22#include "xfs_inode.h"
23#include "xfs_vnodeops.h" 23#include "xfs_vnodeops.h"
24#include "xfs_trace.h"
24#include <linux/xattr.h> 25#include <linux/xattr.h>
25#include <linux/posix_acl_xattr.h> 26#include <linux/posix_acl_xattr.h>
26 27
@@ -250,8 +251,9 @@ xfs_set_mode(struct inode *inode, mode_t mode)
250 if (mode != inode->i_mode) { 251 if (mode != inode->i_mode) {
251 struct iattr iattr; 252 struct iattr iattr;
252 253
253 iattr.ia_valid = ATTR_MODE; 254 iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
254 iattr.ia_mode = mode; 255 iattr.ia_mode = mode;
256 iattr.ia_ctime = current_fs_time(inode->i_sb);
255 257
256 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); 258 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
257 } 259 }
@@ -353,37 +355,14 @@ xfs_acl_chmod(struct inode *inode)
353 return error; 355 return error;
354} 356}
355 357
356/*
357 * System xattr handlers.
358 *
359 * Currently Posix ACLs are the only system namespace extended attribute
360 * handlers supported by XFS, so we just implement the handlers here.
361 * If we ever support other system extended attributes this will need
362 * some refactoring.
363 */
364
365static int 358static int
366xfs_decode_acl(const char *name) 359xfs_xattr_acl_get(struct dentry *dentry, const char *name,
367{ 360 void *value, size_t size, int type)
368 if (strcmp(name, "posix_acl_access") == 0)
369 return ACL_TYPE_ACCESS;
370 else if (strcmp(name, "posix_acl_default") == 0)
371 return ACL_TYPE_DEFAULT;
372 return -EINVAL;
373}
374
375static int
376xfs_xattr_system_get(struct inode *inode, const char *name,
377 void *value, size_t size)
378{ 361{
379 struct posix_acl *acl; 362 struct posix_acl *acl;
380 int type, error; 363 int error;
381
382 type = xfs_decode_acl(name);
383 if (type < 0)
384 return type;
385 364
386 acl = xfs_get_acl(inode, type); 365 acl = xfs_get_acl(dentry->d_inode, type);
387 if (IS_ERR(acl)) 366 if (IS_ERR(acl))
388 return PTR_ERR(acl); 367 return PTR_ERR(acl);
389 if (acl == NULL) 368 if (acl == NULL)
@@ -396,15 +375,13 @@ xfs_xattr_system_get(struct inode *inode, const char *name,
396} 375}
397 376
398static int 377static int
399xfs_xattr_system_set(struct inode *inode, const char *name, 378xfs_xattr_acl_set(struct dentry *dentry, const char *name,
400 const void *value, size_t size, int flags) 379 const void *value, size_t size, int flags, int type)
401{ 380{
381 struct inode *inode = dentry->d_inode;
402 struct posix_acl *acl = NULL; 382 struct posix_acl *acl = NULL;
403 int error = 0, type; 383 int error = 0;
404 384
405 type = xfs_decode_acl(name);
406 if (type < 0)
407 return type;
408 if (flags & XATTR_CREATE) 385 if (flags & XATTR_CREATE)
409 return -EINVAL; 386 return -EINVAL;
410 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) 387 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
@@ -461,8 +438,16 @@ xfs_xattr_system_set(struct inode *inode, const char *name,
461 return error; 438 return error;
462} 439}
463 440
464struct xattr_handler xfs_xattr_system_handler = { 441struct xattr_handler xfs_xattr_acl_access_handler = {
465 .prefix = XATTR_SYSTEM_PREFIX, 442 .prefix = POSIX_ACL_XATTR_ACCESS,
466 .get = xfs_xattr_system_get, 443 .flags = ACL_TYPE_ACCESS,
467 .set = xfs_xattr_system_set, 444 .get = xfs_xattr_acl_get,
445 .set = xfs_xattr_acl_set,
446};
447
448struct xattr_handler xfs_xattr_acl_default_handler = {
449 .prefix = POSIX_ACL_XATTR_DEFAULT,
450 .flags = ACL_TYPE_DEFAULT,
451 .get = xfs_xattr_acl_get,
452 .set = xfs_xattr_acl_set,
468}; 453};
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 87813e405cef..66abe36c1213 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -38,6 +38,7 @@
38#include "xfs_rw.h" 38#include "xfs_rw.h"
39#include "xfs_iomap.h" 39#include "xfs_iomap.h"
40#include "xfs_vnodeops.h" 40#include "xfs_vnodeops.h"
41#include "xfs_trace.h"
41#include <linux/mpage.h> 42#include <linux/mpage.h>
42#include <linux/pagevec.h> 43#include <linux/pagevec.h>
43#include <linux/writeback.h> 44#include <linux/writeback.h>
@@ -76,7 +77,7 @@ xfs_ioend_wake(
76 wake_up(to_ioend_wq(ip)); 77 wake_up(to_ioend_wq(ip));
77} 78}
78 79
79STATIC void 80void
80xfs_count_page_state( 81xfs_count_page_state(
81 struct page *page, 82 struct page *page,
82 int *delalloc, 83 int *delalloc,
@@ -98,48 +99,6 @@ xfs_count_page_state(
98 } while ((bh = bh->b_this_page) != head); 99 } while ((bh = bh->b_this_page) != head);
99} 100}
100 101
101#if defined(XFS_RW_TRACE)
102void
103xfs_page_trace(
104 int tag,
105 struct inode *inode,
106 struct page *page,
107 unsigned long pgoff)
108{
109 xfs_inode_t *ip;
110 loff_t isize = i_size_read(inode);
111 loff_t offset = page_offset(page);
112 int delalloc = -1, unmapped = -1, unwritten = -1;
113
114 if (page_has_buffers(page))
115 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
116
117 ip = XFS_I(inode);
118 if (!ip->i_rwtrace)
119 return;
120
121 ktrace_enter(ip->i_rwtrace,
122 (void *)((unsigned long)tag),
123 (void *)ip,
124 (void *)inode,
125 (void *)page,
126 (void *)pgoff,
127 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
128 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
129 (void *)((unsigned long)((isize >> 32) & 0xffffffff)),
130 (void *)((unsigned long)(isize & 0xffffffff)),
131 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
132 (void *)((unsigned long)(offset & 0xffffffff)),
133 (void *)((unsigned long)delalloc),
134 (void *)((unsigned long)unmapped),
135 (void *)((unsigned long)unwritten),
136 (void *)((unsigned long)current_pid()),
137 (void *)NULL);
138}
139#else
140#define xfs_page_trace(tag, inode, page, pgoff)
141#endif
142
143STATIC struct block_device * 102STATIC struct block_device *
144xfs_find_bdev_for_inode( 103xfs_find_bdev_for_inode(
145 struct xfs_inode *ip) 104 struct xfs_inode *ip)
@@ -1202,7 +1161,7 @@ xfs_vm_writepage(
1202 int delalloc, unmapped, unwritten; 1161 int delalloc, unmapped, unwritten;
1203 struct inode *inode = page->mapping->host; 1162 struct inode *inode = page->mapping->host;
1204 1163
1205 xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0); 1164 trace_xfs_writepage(inode, page, 0);
1206 1165
1207 /* 1166 /*
1208 * We need a transaction if: 1167 * We need a transaction if:
@@ -1307,7 +1266,7 @@ xfs_vm_releasepage(
1307 .nr_to_write = 1, 1266 .nr_to_write = 1,
1308 }; 1267 };
1309 1268
1310 xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, 0); 1269 trace_xfs_releasepage(inode, page, 0);
1311 1270
1312 if (!page_has_buffers(page)) 1271 if (!page_has_buffers(page))
1313 return 0; 1272 return 0;
@@ -1515,19 +1474,13 @@ xfs_vm_direct_IO(
1515 1474
1516 bdev = xfs_find_bdev_for_inode(XFS_I(inode)); 1475 bdev = xfs_find_bdev_for_inode(XFS_I(inode));
1517 1476
1518 if (rw == WRITE) { 1477 iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
1519 iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); 1478 IOMAP_UNWRITTEN : IOMAP_READ);
1520 ret = blockdev_direct_IO_own_locking(rw, iocb, inode, 1479
1521 bdev, iov, offset, nr_segs, 1480 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
1522 xfs_get_blocks_direct, 1481 offset, nr_segs,
1523 xfs_end_io_direct); 1482 xfs_get_blocks_direct,
1524 } else { 1483 xfs_end_io_direct);
1525 iocb->private = xfs_alloc_ioend(inode, IOMAP_READ);
1526 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
1527 bdev, iov, offset, nr_segs,
1528 xfs_get_blocks_direct,
1529 xfs_end_io_direct);
1530 }
1531 1484
1532 if (unlikely(ret != -EIOCBQUEUED && iocb->private)) 1485 if (unlikely(ret != -EIOCBQUEUED && iocb->private))
1533 xfs_destroy_ioend(iocb->private); 1486 xfs_destroy_ioend(iocb->private);
@@ -1587,8 +1540,7 @@ xfs_vm_invalidatepage(
1587 struct page *page, 1540 struct page *page,
1588 unsigned long offset) 1541 unsigned long offset)
1589{ 1542{
1590 xfs_page_trace(XFS_INVALIDPAGE_ENTER, 1543 trace_xfs_invalidatepage(page->mapping->host, page, offset);
1591 page->mapping->host, page, offset);
1592 block_invalidatepage(page, offset); 1544 block_invalidatepage(page, offset);
1593} 1545}
1594 1546
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 221b3e66ceef..4cfc6ea87df8 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -45,4 +45,6 @@ extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
45extern void xfs_ioend_init(void); 45extern void xfs_ioend_init(void);
46extern void xfs_ioend_wait(struct xfs_inode *); 46extern void xfs_ioend_wait(struct xfs_inode *);
47 47
48extern void xfs_count_page_state(struct page *, int *, int *, int *);
49
48#endif /* __XFS_AOPS_H__ */ 50#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 4ddc973aea7a..77b8be81c769 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -39,6 +39,7 @@
39#include "xfs_ag.h" 39#include "xfs_ag.h"
40#include "xfs_dmapi.h" 40#include "xfs_dmapi.h"
41#include "xfs_mount.h" 41#include "xfs_mount.h"
42#include "xfs_trace.h"
42 43
43static kmem_zone_t *xfs_buf_zone; 44static kmem_zone_t *xfs_buf_zone;
44STATIC int xfsbufd(void *); 45STATIC int xfsbufd(void *);
@@ -53,34 +54,6 @@ static struct workqueue_struct *xfslogd_workqueue;
53struct workqueue_struct *xfsdatad_workqueue; 54struct workqueue_struct *xfsdatad_workqueue;
54struct workqueue_struct *xfsconvertd_workqueue; 55struct workqueue_struct *xfsconvertd_workqueue;
55 56
56#ifdef XFS_BUF_TRACE
57void
58xfs_buf_trace(
59 xfs_buf_t *bp,
60 char *id,
61 void *data,
62 void *ra)
63{
64 ktrace_enter(xfs_buf_trace_buf,
65 bp, id,
66 (void *)(unsigned long)bp->b_flags,
67 (void *)(unsigned long)bp->b_hold.counter,
68 (void *)(unsigned long)bp->b_sema.count,
69 (void *)current,
70 data, ra,
71 (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),
72 (void *)(unsigned long)(bp->b_file_offset & 0xffffffff),
73 (void *)(unsigned long)bp->b_buffer_length,
74 NULL, NULL, NULL, NULL, NULL);
75}
76ktrace_t *xfs_buf_trace_buf;
77#define XFS_BUF_TRACE_SIZE 4096
78#define XB_TRACE(bp, id, data) \
79 xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0))
80#else
81#define XB_TRACE(bp, id, data) do { } while (0)
82#endif
83
84#ifdef XFS_BUF_LOCK_TRACKING 57#ifdef XFS_BUF_LOCK_TRACKING
85# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) 58# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
86# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) 59# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1)
@@ -279,7 +252,8 @@ _xfs_buf_initialize(
279 init_waitqueue_head(&bp->b_waiters); 252 init_waitqueue_head(&bp->b_waiters);
280 253
281 XFS_STATS_INC(xb_create); 254 XFS_STATS_INC(xb_create);
282 XB_TRACE(bp, "initialize", target); 255
256 trace_xfs_buf_init(bp, _RET_IP_);
283} 257}
284 258
285/* 259/*
@@ -318,6 +292,7 @@ _xfs_buf_free_pages(
318{ 292{
319 if (bp->b_pages != bp->b_page_array) { 293 if (bp->b_pages != bp->b_page_array) {
320 kmem_free(bp->b_pages); 294 kmem_free(bp->b_pages);
295 bp->b_pages = NULL;
321 } 296 }
322} 297}
323 298
@@ -332,7 +307,7 @@ void
332xfs_buf_free( 307xfs_buf_free(
333 xfs_buf_t *bp) 308 xfs_buf_t *bp)
334{ 309{
335 XB_TRACE(bp, "free", 0); 310 trace_xfs_buf_free(bp, _RET_IP_);
336 311
337 ASSERT(list_empty(&bp->b_hash_list)); 312 ASSERT(list_empty(&bp->b_hash_list));
338 313
@@ -349,9 +324,8 @@ xfs_buf_free(
349 ASSERT(!PagePrivate(page)); 324 ASSERT(!PagePrivate(page));
350 page_cache_release(page); 325 page_cache_release(page);
351 } 326 }
352 _xfs_buf_free_pages(bp);
353 } 327 }
354 328 _xfs_buf_free_pages(bp);
355 xfs_buf_deallocate(bp); 329 xfs_buf_deallocate(bp);
356} 330}
357 331
@@ -445,7 +419,6 @@ _xfs_buf_lookup_pages(
445 if (page_count == bp->b_page_count) 419 if (page_count == bp->b_page_count)
446 bp->b_flags |= XBF_DONE; 420 bp->b_flags |= XBF_DONE;
447 421
448 XB_TRACE(bp, "lookup_pages", (long)page_count);
449 return error; 422 return error;
450} 423}
451 424
@@ -548,7 +521,6 @@ found:
548 if (down_trylock(&bp->b_sema)) { 521 if (down_trylock(&bp->b_sema)) {
549 if (!(flags & XBF_TRYLOCK)) { 522 if (!(flags & XBF_TRYLOCK)) {
550 /* wait for buffer ownership */ 523 /* wait for buffer ownership */
551 XB_TRACE(bp, "get_lock", 0);
552 xfs_buf_lock(bp); 524 xfs_buf_lock(bp);
553 XFS_STATS_INC(xb_get_locked_waited); 525 XFS_STATS_INC(xb_get_locked_waited);
554 } else { 526 } else {
@@ -571,7 +543,8 @@ found:
571 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); 543 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
572 bp->b_flags &= XBF_MAPPED; 544 bp->b_flags &= XBF_MAPPED;
573 } 545 }
574 XB_TRACE(bp, "got_lock", 0); 546
547 trace_xfs_buf_find(bp, flags, _RET_IP_);
575 XFS_STATS_INC(xb_get_locked); 548 XFS_STATS_INC(xb_get_locked);
576 return bp; 549 return bp;
577} 550}
@@ -627,7 +600,7 @@ xfs_buf_get(
627 bp->b_bn = ioff; 600 bp->b_bn = ioff;
628 bp->b_count_desired = bp->b_buffer_length; 601 bp->b_count_desired = bp->b_buffer_length;
629 602
630 XB_TRACE(bp, "get", (unsigned long)flags); 603 trace_xfs_buf_get(bp, flags, _RET_IP_);
631 return bp; 604 return bp;
632 605
633 no_buffer: 606 no_buffer:
@@ -644,8 +617,6 @@ _xfs_buf_read(
644{ 617{
645 int status; 618 int status;
646 619
647 XB_TRACE(bp, "_xfs_buf_read", (unsigned long)flags);
648
649 ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); 620 ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
650 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); 621 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
651 622
@@ -673,19 +644,18 @@ xfs_buf_read(
673 644
674 bp = xfs_buf_get(target, ioff, isize, flags); 645 bp = xfs_buf_get(target, ioff, isize, flags);
675 if (bp) { 646 if (bp) {
647 trace_xfs_buf_read(bp, flags, _RET_IP_);
648
676 if (!XFS_BUF_ISDONE(bp)) { 649 if (!XFS_BUF_ISDONE(bp)) {
677 XB_TRACE(bp, "read", (unsigned long)flags);
678 XFS_STATS_INC(xb_get_read); 650 XFS_STATS_INC(xb_get_read);
679 _xfs_buf_read(bp, flags); 651 _xfs_buf_read(bp, flags);
680 } else if (flags & XBF_ASYNC) { 652 } else if (flags & XBF_ASYNC) {
681 XB_TRACE(bp, "read_async", (unsigned long)flags);
682 /* 653 /*
683 * Read ahead call which is already satisfied, 654 * Read ahead call which is already satisfied,
684 * drop the buffer 655 * drop the buffer
685 */ 656 */
686 goto no_buffer; 657 goto no_buffer;
687 } else { 658 } else {
688 XB_TRACE(bp, "read_done", (unsigned long)flags);
689 /* We do not want read in the flags */ 659 /* We do not want read in the flags */
690 bp->b_flags &= ~XBF_READ; 660 bp->b_flags &= ~XBF_READ;
691 } 661 }
@@ -823,7 +793,7 @@ xfs_buf_get_noaddr(
823 793
824 xfs_buf_unlock(bp); 794 xfs_buf_unlock(bp);
825 795
826 XB_TRACE(bp, "no_daddr", len); 796 trace_xfs_buf_get_noaddr(bp, _RET_IP_);
827 return bp; 797 return bp;
828 798
829 fail_free_mem: 799 fail_free_mem:
@@ -845,8 +815,8 @@ void
845xfs_buf_hold( 815xfs_buf_hold(
846 xfs_buf_t *bp) 816 xfs_buf_t *bp)
847{ 817{
818 trace_xfs_buf_hold(bp, _RET_IP_);
848 atomic_inc(&bp->b_hold); 819 atomic_inc(&bp->b_hold);
849 XB_TRACE(bp, "hold", 0);
850} 820}
851 821
852/* 822/*
@@ -859,7 +829,7 @@ xfs_buf_rele(
859{ 829{
860 xfs_bufhash_t *hash = bp->b_hash; 830 xfs_bufhash_t *hash = bp->b_hash;
861 831
862 XB_TRACE(bp, "rele", bp->b_relse); 832 trace_xfs_buf_rele(bp, _RET_IP_);
863 833
864 if (unlikely(!hash)) { 834 if (unlikely(!hash)) {
865 ASSERT(!bp->b_relse); 835 ASSERT(!bp->b_relse);
@@ -909,21 +879,19 @@ xfs_buf_cond_lock(
909 int locked; 879 int locked;
910 880
911 locked = down_trylock(&bp->b_sema) == 0; 881 locked = down_trylock(&bp->b_sema) == 0;
912 if (locked) { 882 if (locked)
913 XB_SET_OWNER(bp); 883 XB_SET_OWNER(bp);
914 } 884
915 XB_TRACE(bp, "cond_lock", (long)locked); 885 trace_xfs_buf_cond_lock(bp, _RET_IP_);
916 return locked ? 0 : -EBUSY; 886 return locked ? 0 : -EBUSY;
917} 887}
918 888
919#if defined(DEBUG) || defined(XFS_BLI_TRACE)
920int 889int
921xfs_buf_lock_value( 890xfs_buf_lock_value(
922 xfs_buf_t *bp) 891 xfs_buf_t *bp)
923{ 892{
924 return bp->b_sema.count; 893 return bp->b_sema.count;
925} 894}
926#endif
927 895
928/* 896/*
929 * Locks a buffer object. 897 * Locks a buffer object.
@@ -935,12 +903,14 @@ void
935xfs_buf_lock( 903xfs_buf_lock(
936 xfs_buf_t *bp) 904 xfs_buf_t *bp)
937{ 905{
938 XB_TRACE(bp, "lock", 0); 906 trace_xfs_buf_lock(bp, _RET_IP_);
907
939 if (atomic_read(&bp->b_io_remaining)) 908 if (atomic_read(&bp->b_io_remaining))
940 blk_run_address_space(bp->b_target->bt_mapping); 909 blk_run_address_space(bp->b_target->bt_mapping);
941 down(&bp->b_sema); 910 down(&bp->b_sema);
942 XB_SET_OWNER(bp); 911 XB_SET_OWNER(bp);
943 XB_TRACE(bp, "locked", 0); 912
913 trace_xfs_buf_lock_done(bp, _RET_IP_);
944} 914}
945 915
946/* 916/*
@@ -962,7 +932,8 @@ xfs_buf_unlock(
962 932
963 XB_CLEAR_OWNER(bp); 933 XB_CLEAR_OWNER(bp);
964 up(&bp->b_sema); 934 up(&bp->b_sema);
965 XB_TRACE(bp, "unlock", 0); 935
936 trace_xfs_buf_unlock(bp, _RET_IP_);
966} 937}
967 938
968 939
@@ -974,17 +945,18 @@ void
974xfs_buf_pin( 945xfs_buf_pin(
975 xfs_buf_t *bp) 946 xfs_buf_t *bp)
976{ 947{
948 trace_xfs_buf_pin(bp, _RET_IP_);
977 atomic_inc(&bp->b_pin_count); 949 atomic_inc(&bp->b_pin_count);
978 XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter);
979} 950}
980 951
981void 952void
982xfs_buf_unpin( 953xfs_buf_unpin(
983 xfs_buf_t *bp) 954 xfs_buf_t *bp)
984{ 955{
956 trace_xfs_buf_unpin(bp, _RET_IP_);
957
985 if (atomic_dec_and_test(&bp->b_pin_count)) 958 if (atomic_dec_and_test(&bp->b_pin_count))
986 wake_up_all(&bp->b_waiters); 959 wake_up_all(&bp->b_waiters);
987 XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter);
988} 960}
989 961
990int 962int
@@ -1035,7 +1007,7 @@ xfs_buf_iodone_work(
1035 */ 1007 */
1036 if ((bp->b_error == EOPNOTSUPP) && 1008 if ((bp->b_error == EOPNOTSUPP) &&
1037 (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { 1009 (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
1038 XB_TRACE(bp, "ordered_retry", bp->b_iodone); 1010 trace_xfs_buf_ordered_retry(bp, _RET_IP_);
1039 bp->b_flags &= ~XBF_ORDERED; 1011 bp->b_flags &= ~XBF_ORDERED;
1040 bp->b_flags |= _XFS_BARRIER_FAILED; 1012 bp->b_flags |= _XFS_BARRIER_FAILED;
1041 xfs_buf_iorequest(bp); 1013 xfs_buf_iorequest(bp);
@@ -1050,12 +1022,12 @@ xfs_buf_ioend(
1050 xfs_buf_t *bp, 1022 xfs_buf_t *bp,
1051 int schedule) 1023 int schedule)
1052{ 1024{
1025 trace_xfs_buf_iodone(bp, _RET_IP_);
1026
1053 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); 1027 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
1054 if (bp->b_error == 0) 1028 if (bp->b_error == 0)
1055 bp->b_flags |= XBF_DONE; 1029 bp->b_flags |= XBF_DONE;
1056 1030
1057 XB_TRACE(bp, "iodone", bp->b_iodone);
1058
1059 if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) { 1031 if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
1060 if (schedule) { 1032 if (schedule) {
1061 INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); 1033 INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
@@ -1075,7 +1047,7 @@ xfs_buf_ioerror(
1075{ 1047{
1076 ASSERT(error >= 0 && error <= 0xffff); 1048 ASSERT(error >= 0 && error <= 0xffff);
1077 bp->b_error = (unsigned short)error; 1049 bp->b_error = (unsigned short)error;
1078 XB_TRACE(bp, "ioerror", (unsigned long)error); 1050 trace_xfs_buf_ioerror(bp, error, _RET_IP_);
1079} 1051}
1080 1052
1081int 1053int
@@ -1083,7 +1055,7 @@ xfs_bawrite(
1083 void *mp, 1055 void *mp,
1084 struct xfs_buf *bp) 1056 struct xfs_buf *bp)
1085{ 1057{
1086 XB_TRACE(bp, "bawrite", 0); 1058 trace_xfs_buf_bawrite(bp, _RET_IP_);
1087 1059
1088 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); 1060 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
1089 1061
@@ -1102,7 +1074,7 @@ xfs_bdwrite(
1102 void *mp, 1074 void *mp,
1103 struct xfs_buf *bp) 1075 struct xfs_buf *bp)
1104{ 1076{
1105 XB_TRACE(bp, "bdwrite", 0); 1077 trace_xfs_buf_bdwrite(bp, _RET_IP_);
1106 1078
1107 bp->b_strat = xfs_bdstrat_cb; 1079 bp->b_strat = xfs_bdstrat_cb;
1108 bp->b_mount = mp; 1080 bp->b_mount = mp;
@@ -1177,10 +1149,14 @@ _xfs_buf_ioapply(
1177 if (bp->b_flags & XBF_ORDERED) { 1149 if (bp->b_flags & XBF_ORDERED) {
1178 ASSERT(!(bp->b_flags & XBF_READ)); 1150 ASSERT(!(bp->b_flags & XBF_READ));
1179 rw = WRITE_BARRIER; 1151 rw = WRITE_BARRIER;
1180 } else if (bp->b_flags & _XBF_RUN_QUEUES) { 1152 } else if (bp->b_flags & XBF_LOG_BUFFER) {
1181 ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); 1153 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1182 bp->b_flags &= ~_XBF_RUN_QUEUES; 1154 bp->b_flags &= ~_XBF_RUN_QUEUES;
1183 rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC; 1155 rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
1156 } else if (bp->b_flags & _XBF_RUN_QUEUES) {
1157 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1158 bp->b_flags &= ~_XBF_RUN_QUEUES;
1159 rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
1184 } else { 1160 } else {
1185 rw = (bp->b_flags & XBF_WRITE) ? WRITE : 1161 rw = (bp->b_flags & XBF_WRITE) ? WRITE :
1186 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; 1162 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
@@ -1253,7 +1229,7 @@ int
1253xfs_buf_iorequest( 1229xfs_buf_iorequest(
1254 xfs_buf_t *bp) 1230 xfs_buf_t *bp)
1255{ 1231{
1256 XB_TRACE(bp, "iorequest", 0); 1232 trace_xfs_buf_iorequest(bp, _RET_IP_);
1257 1233
1258 if (bp->b_flags & XBF_DELWRI) { 1234 if (bp->b_flags & XBF_DELWRI) {
1259 xfs_buf_delwri_queue(bp, 1); 1235 xfs_buf_delwri_queue(bp, 1);
@@ -1287,11 +1263,13 @@ int
1287xfs_buf_iowait( 1263xfs_buf_iowait(
1288 xfs_buf_t *bp) 1264 xfs_buf_t *bp)
1289{ 1265{
1290 XB_TRACE(bp, "iowait", 0); 1266 trace_xfs_buf_iowait(bp, _RET_IP_);
1267
1291 if (atomic_read(&bp->b_io_remaining)) 1268 if (atomic_read(&bp->b_io_remaining))
1292 blk_run_address_space(bp->b_target->bt_mapping); 1269 blk_run_address_space(bp->b_target->bt_mapping);
1293 wait_for_completion(&bp->b_iowait); 1270 wait_for_completion(&bp->b_iowait);
1294 XB_TRACE(bp, "iowaited", (long)bp->b_error); 1271
1272 trace_xfs_buf_iowait_done(bp, _RET_IP_);
1295 return bp->b_error; 1273 return bp->b_error;
1296} 1274}
1297 1275
@@ -1604,7 +1582,8 @@ xfs_buf_delwri_queue(
1604 struct list_head *dwq = &bp->b_target->bt_delwrite_queue; 1582 struct list_head *dwq = &bp->b_target->bt_delwrite_queue;
1605 spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; 1583 spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
1606 1584
1607 XB_TRACE(bp, "delwri_q", (long)unlock); 1585 trace_xfs_buf_delwri_queue(bp, _RET_IP_);
1586
1608 ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); 1587 ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
1609 1588
1610 spin_lock(dwlk); 1589 spin_lock(dwlk);
@@ -1644,7 +1623,7 @@ xfs_buf_delwri_dequeue(
1644 if (dequeued) 1623 if (dequeued)
1645 xfs_buf_rele(bp); 1624 xfs_buf_rele(bp);
1646 1625
1647 XB_TRACE(bp, "delwri_dq", (long)dequeued); 1626 trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
1648} 1627}
1649 1628
1650STATIC void 1629STATIC void
@@ -1692,7 +1671,7 @@ xfs_buf_delwri_split(
1692 INIT_LIST_HEAD(list); 1671 INIT_LIST_HEAD(list);
1693 spin_lock(dwlk); 1672 spin_lock(dwlk);
1694 list_for_each_entry_safe(bp, n, dwq, b_list) { 1673 list_for_each_entry_safe(bp, n, dwq, b_list) {
1695 XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp)); 1674 trace_xfs_buf_delwri_split(bp, _RET_IP_);
1696 ASSERT(bp->b_flags & XBF_DELWRI); 1675 ASSERT(bp->b_flags & XBF_DELWRI);
1697 1676
1698 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) { 1677 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
@@ -1816,14 +1795,10 @@ xfs_flush_buftarg(
1816int __init 1795int __init
1817xfs_buf_init(void) 1796xfs_buf_init(void)
1818{ 1797{
1819#ifdef XFS_BUF_TRACE
1820 xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_NOFS);
1821#endif
1822
1823 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", 1798 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
1824 KM_ZONE_HWALIGN, NULL); 1799 KM_ZONE_HWALIGN, NULL);
1825 if (!xfs_buf_zone) 1800 if (!xfs_buf_zone)
1826 goto out_free_trace_buf; 1801 goto out;
1827 1802
1828 xfslogd_workqueue = create_workqueue("xfslogd"); 1803 xfslogd_workqueue = create_workqueue("xfslogd");
1829 if (!xfslogd_workqueue) 1804 if (!xfslogd_workqueue)
@@ -1846,10 +1821,7 @@ xfs_buf_init(void)
1846 destroy_workqueue(xfslogd_workqueue); 1821 destroy_workqueue(xfslogd_workqueue);
1847 out_free_buf_zone: 1822 out_free_buf_zone:
1848 kmem_zone_destroy(xfs_buf_zone); 1823 kmem_zone_destroy(xfs_buf_zone);
1849 out_free_trace_buf: 1824 out:
1850#ifdef XFS_BUF_TRACE
1851 ktrace_free(xfs_buf_trace_buf);
1852#endif
1853 return -ENOMEM; 1825 return -ENOMEM;
1854} 1826}
1855 1827
@@ -1861,9 +1833,6 @@ xfs_buf_terminate(void)
1861 destroy_workqueue(xfsdatad_workqueue); 1833 destroy_workqueue(xfsdatad_workqueue);
1862 destroy_workqueue(xfslogd_workqueue); 1834 destroy_workqueue(xfslogd_workqueue);
1863 kmem_zone_destroy(xfs_buf_zone); 1835 kmem_zone_destroy(xfs_buf_zone);
1864#ifdef XFS_BUF_TRACE
1865 ktrace_free(xfs_buf_trace_buf);
1866#endif
1867} 1836}
1868 1837
1869#ifdef CONFIG_KDB_MODULES 1838#ifdef CONFIG_KDB_MODULES
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 5f07dd91c5fa..a34c7b54822d 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -55,6 +55,7 @@ typedef enum {
55 XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ 55 XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
56 XBF_ORDERED = (1 << 11), /* use ordered writes */ 56 XBF_ORDERED = (1 << 11), /* use ordered writes */
57 XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ 57 XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
58 XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log */
58 59
59 /* flags used only as arguments to access routines */ 60 /* flags used only as arguments to access routines */
60 XBF_LOCK = (1 << 14), /* lock requested */ 61 XBF_LOCK = (1 << 14), /* lock requested */
@@ -95,6 +96,28 @@ typedef enum {
95 _XFS_BARRIER_FAILED = (1 << 23), 96 _XFS_BARRIER_FAILED = (1 << 23),
96} xfs_buf_flags_t; 97} xfs_buf_flags_t;
97 98
99#define XFS_BUF_FLAGS \
100 { XBF_READ, "READ" }, \
101 { XBF_WRITE, "WRITE" }, \
102 { XBF_MAPPED, "MAPPED" }, \
103 { XBF_ASYNC, "ASYNC" }, \
104 { XBF_DONE, "DONE" }, \
105 { XBF_DELWRI, "DELWRI" }, \
106 { XBF_STALE, "STALE" }, \
107 { XBF_FS_MANAGED, "FS_MANAGED" }, \
108 { XBF_ORDERED, "ORDERED" }, \
109 { XBF_READ_AHEAD, "READ_AHEAD" }, \
110 { XBF_LOCK, "LOCK" }, /* should never be set */\
111 { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\
112 { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\
113 { _XBF_PAGE_CACHE, "PAGE_CACHE" }, \
114 { _XBF_PAGES, "PAGES" }, \
115 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
116 { _XBF_DELWRI_Q, "DELWRI_Q" }, \
117 { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \
118 { _XFS_BARRIER_FAILED, "BARRIER_FAILED" }
119
120
98typedef enum { 121typedef enum {
99 XBT_FORCE_SLEEP = 0, 122 XBT_FORCE_SLEEP = 0,
100 XBT_FORCE_FLUSH = 1, 123 XBT_FORCE_FLUSH = 1,
@@ -243,13 +266,6 @@ extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
243extern int xfs_buf_init(void); 266extern int xfs_buf_init(void);
244extern void xfs_buf_terminate(void); 267extern void xfs_buf_terminate(void);
245 268
246#ifdef XFS_BUF_TRACE
247extern ktrace_t *xfs_buf_trace_buf;
248extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
249#else
250#define xfs_buf_trace(bp,id,ptr,ra) do { } while (0)
251#endif
252
253#define xfs_buf_target_name(target) \ 269#define xfs_buf_target_name(target) \
254 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) 270 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })
255 271
@@ -365,10 +381,6 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
365 381
366#define xfs_bpin(bp) xfs_buf_pin(bp) 382#define xfs_bpin(bp) xfs_buf_pin(bp)
367#define xfs_bunpin(bp) xfs_buf_unpin(bp) 383#define xfs_bunpin(bp) xfs_buf_unpin(bp)
368
369#define xfs_buftrace(id, bp) \
370 xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
371
372#define xfs_biodone(bp) xfs_buf_ioend(bp, 0) 384#define xfs_biodone(bp) xfs_buf_ioend(bp, 0)
373 385
374#define xfs_biomove(bp, off, len, data, rw) \ 386#define xfs_biomove(bp, off, len, data, rw) \
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 08be36d7326c..7501b85fd860 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -19,6 +19,7 @@
19#include "xfs_vnodeops.h" 19#include "xfs_vnodeops.h"
20#include "xfs_bmap_btree.h" 20#include "xfs_bmap_btree.h"
21#include "xfs_inode.h" 21#include "xfs_inode.h"
22#include "xfs_trace.h"
22 23
23int fs_noerr(void) { return 0; } 24int fs_noerr(void) { return 0; }
24int fs_nosys(void) { return ENOSYS; } 25int fs_nosys(void) { return ENOSYS; }
@@ -51,6 +52,8 @@ xfs_flushinval_pages(
51 struct address_space *mapping = VFS_I(ip)->i_mapping; 52 struct address_space *mapping = VFS_I(ip)->i_mapping;
52 int ret = 0; 53 int ret = 0;
53 54
55 trace_xfs_pagecache_inval(ip, first, last);
56
54 if (mapping->nrpages) { 57 if (mapping->nrpages) {
55 xfs_iflags_clear(ip, XFS_ITRUNCATED); 58 xfs_iflags_clear(ip, XFS_ITRUNCATED);
56 ret = filemap_write_and_wait(mapping); 59 ret = filemap_write_and_wait(mapping);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 5bb523d7f37e..a034cf624437 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -51,6 +51,7 @@
51#include "xfs_quota.h" 51#include "xfs_quota.h"
52#include "xfs_inode_item.h" 52#include "xfs_inode_item.h"
53#include "xfs_export.h" 53#include "xfs_export.h"
54#include "xfs_trace.h"
54 55
55#include <linux/capability.h> 56#include <linux/capability.h>
56#include <linux/dcache.h> 57#include <linux/dcache.h>
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index eafcc7c18706..be1527b1670c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -46,6 +46,7 @@
46#include "xfs_attr.h" 46#include "xfs_attr.h"
47#include "xfs_ioctl.h" 47#include "xfs_ioctl.h"
48#include "xfs_ioctl32.h" 48#include "xfs_ioctl32.h"
49#include "xfs_trace.h"
49 50
50#define _NATIVE_IOC(cmd, type) \ 51#define _NATIVE_IOC(cmd, type) \
51 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) 52 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 1f3b4b8f7dd4..225946012d0b 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -47,6 +47,7 @@
47#include "xfs_buf_item.h" 47#include "xfs_buf_item.h"
48#include "xfs_utils.h" 48#include "xfs_utils.h"
49#include "xfs_vnodeops.h" 49#include "xfs_vnodeops.h"
50#include "xfs_trace.h"
50 51
51#include <linux/capability.h> 52#include <linux/capability.h>
52#include <linux/xattr.h> 53#include <linux/xattr.h>
@@ -793,7 +794,7 @@ xfs_setup_inode(
793 struct inode *inode = &ip->i_vnode; 794 struct inode *inode = &ip->i_vnode;
794 795
795 inode->i_ino = ip->i_ino; 796 inode->i_ino = ip->i_ino;
796 inode->i_state = I_NEW|I_LOCK; 797 inode->i_state = I_NEW;
797 inode_add_to_lists(ip->i_mount->m_super, inode); 798 inode_add_to_lists(ip->i_mount->m_super, inode);
798 799
799 inode->i_mode = ip->i_d.di_mode; 800 inode->i_mode = ip->i_d.di_mode;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 6127e24062d0..5af0c81ca1ae 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -40,7 +40,6 @@
40#include <sv.h> 40#include <sv.h>
41#include <time.h> 41#include <time.h>
42 42
43#include <support/ktrace.h>
44#include <support/debug.h> 43#include <support/debug.h>
45#include <support/uuid.h> 44#include <support/uuid.h>
46 45
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 1bf47f219c97..0d32457abef1 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -48,73 +48,12 @@
48#include "xfs_utils.h" 48#include "xfs_utils.h"
49#include "xfs_iomap.h" 49#include "xfs_iomap.h"
50#include "xfs_vnodeops.h" 50#include "xfs_vnodeops.h"
51#include "xfs_trace.h"
51 52
52#include <linux/capability.h> 53#include <linux/capability.h>
53#include <linux/writeback.h> 54#include <linux/writeback.h>
54 55
55 56
56#if defined(XFS_RW_TRACE)
57void
58xfs_rw_enter_trace(
59 int tag,
60 xfs_inode_t *ip,
61 void *data,
62 size_t segs,
63 loff_t offset,
64 int ioflags)
65{
66 if (ip->i_rwtrace == NULL)
67 return;
68 ktrace_enter(ip->i_rwtrace,
69 (void *)(unsigned long)tag,
70 (void *)ip,
71 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
72 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
73 (void *)data,
74 (void *)((unsigned long)segs),
75 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
76 (void *)((unsigned long)(offset & 0xffffffff)),
77 (void *)((unsigned long)ioflags),
78 (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)),
79 (void *)((unsigned long)(ip->i_new_size & 0xffffffff)),
80 (void *)((unsigned long)current_pid()),
81 (void *)NULL,
82 (void *)NULL,
83 (void *)NULL,
84 (void *)NULL);
85}
86
87void
88xfs_inval_cached_trace(
89 xfs_inode_t *ip,
90 xfs_off_t offset,
91 xfs_off_t len,
92 xfs_off_t first,
93 xfs_off_t last)
94{
95
96 if (ip->i_rwtrace == NULL)
97 return;
98 ktrace_enter(ip->i_rwtrace,
99 (void *)(__psint_t)XFS_INVAL_CACHED,
100 (void *)ip,
101 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
102 (void *)((unsigned long)(offset & 0xffffffff)),
103 (void *)((unsigned long)((len >> 32) & 0xffffffff)),
104 (void *)((unsigned long)(len & 0xffffffff)),
105 (void *)((unsigned long)((first >> 32) & 0xffffffff)),
106 (void *)((unsigned long)(first & 0xffffffff)),
107 (void *)((unsigned long)((last >> 32) & 0xffffffff)),
108 (void *)((unsigned long)(last & 0xffffffff)),
109 (void *)((unsigned long)current_pid()),
110 (void *)NULL,
111 (void *)NULL,
112 (void *)NULL,
113 (void *)NULL,
114 (void *)NULL);
115}
116#endif
117
118/* 57/*
119 * xfs_iozero 58 * xfs_iozero
120 * 59 *
@@ -250,8 +189,7 @@ xfs_read(
250 } 189 }
251 } 190 }
252 191
253 xfs_rw_enter_trace(XFS_READ_ENTER, ip, 192 trace_xfs_file_read(ip, size, *offset, ioflags);
254 (void *)iovp, segs, *offset, ioflags);
255 193
256 iocb->ki_pos = *offset; 194 iocb->ki_pos = *offset;
257 ret = generic_file_aio_read(iocb, iovp, segs, *offset); 195 ret = generic_file_aio_read(iocb, iovp, segs, *offset);
@@ -292,8 +230,9 @@ xfs_splice_read(
292 return -error; 230 return -error;
293 } 231 }
294 } 232 }
295 xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, ip, 233
296 pipe, count, *ppos, ioflags); 234 trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
235
297 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); 236 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
298 if (ret > 0) 237 if (ret > 0)
299 XFS_STATS_ADD(xs_read_bytes, ret); 238 XFS_STATS_ADD(xs_read_bytes, ret);
@@ -342,8 +281,8 @@ xfs_splice_write(
342 ip->i_new_size = new_size; 281 ip->i_new_size = new_size;
343 xfs_iunlock(ip, XFS_ILOCK_EXCL); 282 xfs_iunlock(ip, XFS_ILOCK_EXCL);
344 283
345 xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, ip, 284 trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
346 pipe, count, *ppos, ioflags); 285
347 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); 286 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
348 if (ret > 0) 287 if (ret > 0)
349 XFS_STATS_ADD(xs_write_bytes, ret); 288 XFS_STATS_ADD(xs_write_bytes, ret);
@@ -710,8 +649,6 @@ start:
710 if ((ioflags & IO_ISDIRECT)) { 649 if ((ioflags & IO_ISDIRECT)) {
711 if (mapping->nrpages) { 650 if (mapping->nrpages) {
712 WARN_ON(need_i_mutex == 0); 651 WARN_ON(need_i_mutex == 0);
713 xfs_inval_cached_trace(xip, pos, -1,
714 (pos & PAGE_CACHE_MASK), -1);
715 error = xfs_flushinval_pages(xip, 652 error = xfs_flushinval_pages(xip,
716 (pos & PAGE_CACHE_MASK), 653 (pos & PAGE_CACHE_MASK),
717 -1, FI_REMAPF_LOCKED); 654 -1, FI_REMAPF_LOCKED);
@@ -728,8 +665,7 @@ start:
728 need_i_mutex = 0; 665 need_i_mutex = 0;
729 } 666 }
730 667
731 xfs_rw_enter_trace(XFS_DIOWR_ENTER, xip, (void *)iovp, segs, 668 trace_xfs_file_direct_write(xip, count, *offset, ioflags);
732 *offset, ioflags);
733 ret = generic_file_direct_write(iocb, iovp, 669 ret = generic_file_direct_write(iocb, iovp,
734 &segs, pos, offset, count, ocount); 670 &segs, pos, offset, count, ocount);
735 671
@@ -752,8 +688,7 @@ start:
752 ssize_t ret2 = 0; 688 ssize_t ret2 = 0;
753 689
754write_retry: 690write_retry:
755 xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, 691 trace_xfs_file_buffered_write(xip, count, *offset, ioflags);
756 *offset, ioflags);
757 ret2 = generic_file_buffered_write(iocb, iovp, segs, 692 ret2 = generic_file_buffered_write(iocb, iovp, segs,
758 pos, offset, count, ret); 693 pos, offset, count, ret);
759 /* 694 /*
@@ -858,7 +793,7 @@ int
858xfs_bdstrat_cb(struct xfs_buf *bp) 793xfs_bdstrat_cb(struct xfs_buf *bp)
859{ 794{
860 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { 795 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
861 xfs_buftrace("XFS__BDSTRAT IOERROR", bp); 796 trace_xfs_bdstrat_shut(bp, _RET_IP_);
862 /* 797 /*
863 * Metadata write that didn't get logged but 798 * Metadata write that didn't get logged but
864 * written delayed anyway. These aren't associated 799 * written delayed anyway. These aren't associated
@@ -891,7 +826,7 @@ xfsbdstrat(
891 return; 826 return;
892 } 827 }
893 828
894 xfs_buftrace("XFSBDSTRAT IOERROR", bp); 829 trace_xfs_bdstrat_shut(bp, _RET_IP_);
895 xfs_bioerror_relse(bp); 830 xfs_bioerror_relse(bp);
896} 831}
897 832
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index e6be37dbd0e9..d1f7789c7ffb 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -20,52 +20,7 @@
20 20
21struct xfs_mount; 21struct xfs_mount;
22struct xfs_inode; 22struct xfs_inode;
23struct xfs_bmbt_irec;
24struct xfs_buf; 23struct xfs_buf;
25struct xfs_iomap;
26
27#if defined(XFS_RW_TRACE)
28/*
29 * Defines for the trace mechanisms in xfs_lrw.c.
30 */
31#define XFS_RW_KTRACE_SIZE 128
32
33#define XFS_READ_ENTER 1
34#define XFS_WRITE_ENTER 2
35#define XFS_IOMAP_READ_ENTER 3
36#define XFS_IOMAP_WRITE_ENTER 4
37#define XFS_IOMAP_READ_MAP 5
38#define XFS_IOMAP_WRITE_MAP 6
39#define XFS_IOMAP_WRITE_NOSPACE 7
40#define XFS_ITRUNC_START 8
41#define XFS_ITRUNC_FINISH1 9
42#define XFS_ITRUNC_FINISH2 10
43#define XFS_CTRUNC1 11
44#define XFS_CTRUNC2 12
45#define XFS_CTRUNC3 13
46#define XFS_CTRUNC4 14
47#define XFS_CTRUNC5 15
48#define XFS_CTRUNC6 16
49#define XFS_BUNMAP 17
50#define XFS_INVAL_CACHED 18
51#define XFS_DIORD_ENTER 19
52#define XFS_DIOWR_ENTER 20
53#define XFS_WRITEPAGE_ENTER 22
54#define XFS_RELEASEPAGE_ENTER 23
55#define XFS_INVALIDPAGE_ENTER 24
56#define XFS_IOMAP_ALLOC_ENTER 25
57#define XFS_IOMAP_ALLOC_MAP 26
58#define XFS_IOMAP_UNWRITTEN 27
59#define XFS_SPLICE_READ_ENTER 28
60#define XFS_SPLICE_WRITE_ENTER 29
61extern void xfs_rw_enter_trace(int, struct xfs_inode *,
62 void *, size_t, loff_t, int);
63extern void xfs_inval_cached_trace(struct xfs_inode *,
64 xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t);
65#else
66#define xfs_rw_enter_trace(tag, ip, data, size, offset, ioflags)
67#define xfs_inval_cached_trace(ip, offset, len, first, last)
68#endif
69 24
70/* errors from xfsbdstrat() must be extracted from the buffer */ 25/* errors from xfsbdstrat() must be extracted from the buffer */
71extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); 26extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 1bfb0e980193..77414db10dc2 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -15,6 +15,7 @@
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18
18#include "xfs.h" 19#include "xfs.h"
19#include "xfs_bit.h" 20#include "xfs_bit.h"
20#include "xfs_log.h" 21#include "xfs_log.h"
@@ -52,11 +53,11 @@
52#include "xfs_trans_priv.h" 53#include "xfs_trans_priv.h"
53#include "xfs_filestream.h" 54#include "xfs_filestream.h"
54#include "xfs_da_btree.h" 55#include "xfs_da_btree.h"
55#include "xfs_dir2_trace.h"
56#include "xfs_extfree_item.h" 56#include "xfs_extfree_item.h"
57#include "xfs_mru_cache.h" 57#include "xfs_mru_cache.h"
58#include "xfs_inode_item.h" 58#include "xfs_inode_item.h"
59#include "xfs_sync.h" 59#include "xfs_sync.h"
60#include "xfs_trace.h"
60 61
61#include <linux/namei.h> 62#include <linux/namei.h>
62#include <linux/init.h> 63#include <linux/init.h>
@@ -953,16 +954,14 @@ xfs_fs_destroy_inode(
953 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); 954 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
954 955
955 /* 956 /*
956 * If we have nothing to flush with this inode then complete the 957 * We always use background reclaim here because even if the
957 * teardown now, otherwise delay the flush operation. 958 * inode is clean, it still may be under IO and hence we have
959 * to take the flush lock. The background reclaim path handles
960 * this more efficiently than we can here, so simply let background
961 * reclaim tear down all inodes.
958 */ 962 */
959 if (!xfs_inode_clean(ip)) {
960 xfs_inode_set_reclaim_tag(ip);
961 return;
962 }
963
964out_reclaim: 963out_reclaim:
965 xfs_ireclaim(ip); 964 xfs_inode_set_reclaim_tag(ip);
966} 965}
967 966
968/* 967/*
@@ -1525,8 +1524,6 @@ xfs_fs_fill_super(
1525 goto fail_vnrele; 1524 goto fail_vnrele;
1526 1525
1527 kfree(mtpt); 1526 kfree(mtpt);
1528
1529 xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
1530 return 0; 1527 return 0;
1531 1528
1532 out_filestream_unmount: 1529 out_filestream_unmount:
@@ -1602,94 +1599,6 @@ static struct file_system_type xfs_fs_type = {
1602}; 1599};
1603 1600
1604STATIC int __init 1601STATIC int __init
1605xfs_alloc_trace_bufs(void)
1606{
1607#ifdef XFS_ALLOC_TRACE
1608 xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_MAYFAIL);
1609 if (!xfs_alloc_trace_buf)
1610 goto out;
1611#endif
1612#ifdef XFS_BMAP_TRACE
1613 xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_MAYFAIL);
1614 if (!xfs_bmap_trace_buf)
1615 goto out_free_alloc_trace;
1616#endif
1617#ifdef XFS_BTREE_TRACE
1618 xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE,
1619 KM_MAYFAIL);
1620 if (!xfs_allocbt_trace_buf)
1621 goto out_free_bmap_trace;
1622
1623 xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL);
1624 if (!xfs_inobt_trace_buf)
1625 goto out_free_allocbt_trace;
1626
1627 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
1628 if (!xfs_bmbt_trace_buf)
1629 goto out_free_inobt_trace;
1630#endif
1631#ifdef XFS_ATTR_TRACE
1632 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
1633 if (!xfs_attr_trace_buf)
1634 goto out_free_bmbt_trace;
1635#endif
1636#ifdef XFS_DIR2_TRACE
1637 xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_MAYFAIL);
1638 if (!xfs_dir2_trace_buf)
1639 goto out_free_attr_trace;
1640#endif
1641
1642 return 0;
1643
1644#ifdef XFS_DIR2_TRACE
1645 out_free_attr_trace:
1646#endif
1647#ifdef XFS_ATTR_TRACE
1648 ktrace_free(xfs_attr_trace_buf);
1649 out_free_bmbt_trace:
1650#endif
1651#ifdef XFS_BTREE_TRACE
1652 ktrace_free(xfs_bmbt_trace_buf);
1653 out_free_inobt_trace:
1654 ktrace_free(xfs_inobt_trace_buf);
1655 out_free_allocbt_trace:
1656 ktrace_free(xfs_allocbt_trace_buf);
1657 out_free_bmap_trace:
1658#endif
1659#ifdef XFS_BMAP_TRACE
1660 ktrace_free(xfs_bmap_trace_buf);
1661 out_free_alloc_trace:
1662#endif
1663#ifdef XFS_ALLOC_TRACE
1664 ktrace_free(xfs_alloc_trace_buf);
1665 out:
1666#endif
1667 return -ENOMEM;
1668}
1669
1670STATIC void
1671xfs_free_trace_bufs(void)
1672{
1673#ifdef XFS_DIR2_TRACE
1674 ktrace_free(xfs_dir2_trace_buf);
1675#endif
1676#ifdef XFS_ATTR_TRACE
1677 ktrace_free(xfs_attr_trace_buf);
1678#endif
1679#ifdef XFS_BTREE_TRACE
1680 ktrace_free(xfs_bmbt_trace_buf);
1681 ktrace_free(xfs_inobt_trace_buf);
1682 ktrace_free(xfs_allocbt_trace_buf);
1683#endif
1684#ifdef XFS_BMAP_TRACE
1685 ktrace_free(xfs_bmap_trace_buf);
1686#endif
1687#ifdef XFS_ALLOC_TRACE
1688 ktrace_free(xfs_alloc_trace_buf);
1689#endif
1690}
1691
1692STATIC int __init
1693xfs_init_zones(void) 1602xfs_init_zones(void)
1694{ 1603{
1695 1604
@@ -1830,7 +1739,6 @@ init_xfs_fs(void)
1830 printk(KERN_INFO XFS_VERSION_STRING " with " 1739 printk(KERN_INFO XFS_VERSION_STRING " with "
1831 XFS_BUILD_OPTIONS " enabled\n"); 1740 XFS_BUILD_OPTIONS " enabled\n");
1832 1741
1833 ktrace_init(64);
1834 xfs_ioend_init(); 1742 xfs_ioend_init();
1835 xfs_dir_startup(); 1743 xfs_dir_startup();
1836 1744
@@ -1838,13 +1746,9 @@ init_xfs_fs(void)
1838 if (error) 1746 if (error)
1839 goto out; 1747 goto out;
1840 1748
1841 error = xfs_alloc_trace_bufs();
1842 if (error)
1843 goto out_destroy_zones;
1844
1845 error = xfs_mru_cache_init(); 1749 error = xfs_mru_cache_init();
1846 if (error) 1750 if (error)
1847 goto out_free_trace_buffers; 1751 goto out_destroy_zones;
1848 1752
1849 error = xfs_filestream_init(); 1753 error = xfs_filestream_init();
1850 if (error) 1754 if (error)
@@ -1879,8 +1783,6 @@ init_xfs_fs(void)
1879 xfs_filestream_uninit(); 1783 xfs_filestream_uninit();
1880 out_mru_cache_uninit: 1784 out_mru_cache_uninit:
1881 xfs_mru_cache_uninit(); 1785 xfs_mru_cache_uninit();
1882 out_free_trace_buffers:
1883 xfs_free_trace_bufs();
1884 out_destroy_zones: 1786 out_destroy_zones:
1885 xfs_destroy_zones(); 1787 xfs_destroy_zones();
1886 out: 1788 out:
@@ -1897,9 +1799,7 @@ exit_xfs_fs(void)
1897 xfs_buf_terminate(); 1799 xfs_buf_terminate();
1898 xfs_filestream_uninit(); 1800 xfs_filestream_uninit();
1899 xfs_mru_cache_uninit(); 1801 xfs_mru_cache_uninit();
1900 xfs_free_trace_bufs();
1901 xfs_destroy_zones(); 1802 xfs_destroy_zones();
1902 ktrace_uninit();
1903} 1803}
1904 1804
1905module_init(init_xfs_fs); 1805module_init(init_xfs_fs);
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 18175ebd58ed..233d4b9881b1 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -56,12 +56,6 @@ extern void xfs_qm_exit(void);
56# define XFS_BIGFS_STRING 56# define XFS_BIGFS_STRING
57#endif 57#endif
58 58
59#ifdef CONFIG_XFS_TRACE
60# define XFS_TRACE_STRING "tracing, "
61#else
62# define XFS_TRACE_STRING
63#endif
64
65#ifdef CONFIG_XFS_DMAPI 59#ifdef CONFIG_XFS_DMAPI
66# define XFS_DMAPI_STRING "dmapi support, " 60# define XFS_DMAPI_STRING "dmapi support, "
67#else 61#else
@@ -78,7 +72,6 @@ extern void xfs_qm_exit(void);
78 XFS_SECURITY_STRING \ 72 XFS_SECURITY_STRING \
79 XFS_REALTIME_STRING \ 73 XFS_REALTIME_STRING \
80 XFS_BIGFS_STRING \ 74 XFS_BIGFS_STRING \
81 XFS_TRACE_STRING \
82 XFS_DMAPI_STRING \ 75 XFS_DMAPI_STRING \
83 XFS_DBG_STRING /* DBG must be last */ 76 XFS_DBG_STRING /* DBG must be last */
84 77
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index d895a3a960f5..1f5e4bb5e970 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -44,6 +44,7 @@
44#include "xfs_inode_item.h" 44#include "xfs_inode_item.h"
45#include "xfs_rw.h" 45#include "xfs_rw.h"
46#include "xfs_quota.h" 46#include "xfs_quota.h"
47#include "xfs_trace.h"
47 48
48#include <linux/kthread.h> 49#include <linux/kthread.h>
49#include <linux/freezer.h> 50#include <linux/freezer.h>
@@ -64,7 +65,6 @@ xfs_inode_ag_lookup(
64 * as the tree is sparse and a gang lookup walks to find 65 * as the tree is sparse and a gang lookup walks to find
65 * the number of objects requested. 66 * the number of objects requested.
66 */ 67 */
67 read_lock(&pag->pag_ici_lock);
68 if (tag == XFS_ICI_NO_TAG) { 68 if (tag == XFS_ICI_NO_TAG) {
69 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 69 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
70 (void **)&ip, *first_index, 1); 70 (void **)&ip, *first_index, 1);
@@ -73,7 +73,7 @@ xfs_inode_ag_lookup(
73 (void **)&ip, *first_index, 1, tag); 73 (void **)&ip, *first_index, 1, tag);
74 } 74 }
75 if (!nr_found) 75 if (!nr_found)
76 goto unlock; 76 return NULL;
77 77
78 /* 78 /*
79 * Update the index for the next lookup. Catch overflows 79 * Update the index for the next lookup. Catch overflows
@@ -83,13 +83,8 @@ xfs_inode_ag_lookup(
83 */ 83 */
84 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 84 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
85 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 85 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
86 goto unlock; 86 return NULL;
87
88 return ip; 87 return ip;
89
90unlock:
91 read_unlock(&pag->pag_ici_lock);
92 return NULL;
93} 88}
94 89
95STATIC int 90STATIC int
@@ -99,7 +94,8 @@ xfs_inode_ag_walk(
99 int (*execute)(struct xfs_inode *ip, 94 int (*execute)(struct xfs_inode *ip,
100 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
101 int flags, 96 int flags,
102 int tag) 97 int tag,
98 int exclusive)
103{ 99{
104 struct xfs_perag *pag = &mp->m_perag[ag]; 100 struct xfs_perag *pag = &mp->m_perag[ag];
105 uint32_t first_index; 101 uint32_t first_index;
@@ -113,10 +109,20 @@ restart:
113 int error = 0; 109 int error = 0;
114 xfs_inode_t *ip; 110 xfs_inode_t *ip;
115 111
112 if (exclusive)
113 write_lock(&pag->pag_ici_lock);
114 else
115 read_lock(&pag->pag_ici_lock);
116 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); 116 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
117 if (!ip) 117 if (!ip) {
118 if (exclusive)
119 write_unlock(&pag->pag_ici_lock);
120 else
121 read_unlock(&pag->pag_ici_lock);
118 break; 122 break;
123 }
119 124
125 /* execute releases pag->pag_ici_lock */
120 error = execute(ip, pag, flags); 126 error = execute(ip, pag, flags);
121 if (error == EAGAIN) { 127 if (error == EAGAIN) {
122 skipped++; 128 skipped++;
@@ -124,9 +130,8 @@ restart:
124 } 130 }
125 if (error) 131 if (error)
126 last_error = error; 132 last_error = error;
127 /* 133
128 * bail out if the filesystem is corrupted. 134 /* bail out if the filesystem is corrupted. */
129 */
130 if (error == EFSCORRUPTED) 135 if (error == EFSCORRUPTED)
131 break; 136 break;
132 137
@@ -147,7 +152,8 @@ xfs_inode_ag_iterator(
147 int (*execute)(struct xfs_inode *ip, 152 int (*execute)(struct xfs_inode *ip,
148 struct xfs_perag *pag, int flags), 153 struct xfs_perag *pag, int flags),
149 int flags, 154 int flags,
150 int tag) 155 int tag,
156 int exclusive)
151{ 157{
152 int error = 0; 158 int error = 0;
153 int last_error = 0; 159 int last_error = 0;
@@ -156,7 +162,8 @@ xfs_inode_ag_iterator(
156 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 162 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
157 if (!mp->m_perag[ag].pag_ici_init) 163 if (!mp->m_perag[ag].pag_ici_init)
158 continue; 164 continue;
159 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); 165 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag,
166 exclusive);
160 if (error) { 167 if (error) {
161 last_error = error; 168 last_error = error;
162 if (error == EFSCORRUPTED) 169 if (error == EFSCORRUPTED)
@@ -173,30 +180,31 @@ xfs_sync_inode_valid(
173 struct xfs_perag *pag) 180 struct xfs_perag *pag)
174{ 181{
175 struct inode *inode = VFS_I(ip); 182 struct inode *inode = VFS_I(ip);
183 int error = EFSCORRUPTED;
176 184
177 /* nothing to sync during shutdown */ 185 /* nothing to sync during shutdown */
178 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 186 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
179 read_unlock(&pag->pag_ici_lock); 187 goto out_unlock;
180 return EFSCORRUPTED;
181 }
182 188
183 /* 189 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
184 * If we can't get a reference on the inode, it must be in reclaim. 190 error = ENOENT;
185 * Leave it for the reclaim code to flush. Also avoid inodes that 191 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
186 * haven't been fully initialised. 192 goto out_unlock;
187 */
188 if (!igrab(inode)) {
189 read_unlock(&pag->pag_ici_lock);
190 return ENOENT;
191 }
192 read_unlock(&pag->pag_ici_lock);
193 193
194 if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { 194 /* If we can't grab the inode, it must on it's way to reclaim. */
195 if (!igrab(inode))
196 goto out_unlock;
197
198 if (is_bad_inode(inode)) {
195 IRELE(ip); 199 IRELE(ip);
196 return ENOENT; 200 goto out_unlock;
197 } 201 }
198 202
199 return 0; 203 /* inode is valid */
204 error = 0;
205out_unlock:
206 read_unlock(&pag->pag_ici_lock);
207 return error;
200} 208}
201 209
202STATIC int 210STATIC int
@@ -281,7 +289,7 @@ xfs_sync_data(
281 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 289 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
282 290
283 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 291 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
284 XFS_ICI_NO_TAG); 292 XFS_ICI_NO_TAG, 0);
285 if (error) 293 if (error)
286 return XFS_ERROR(error); 294 return XFS_ERROR(error);
287 295
@@ -303,7 +311,7 @@ xfs_sync_attr(
303 ASSERT((flags & ~SYNC_WAIT) == 0); 311 ASSERT((flags & ~SYNC_WAIT) == 0);
304 312
305 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 313 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
306 XFS_ICI_NO_TAG); 314 XFS_ICI_NO_TAG, 0);
307} 315}
308 316
309STATIC int 317STATIC int
@@ -663,60 +671,6 @@ xfs_syncd_stop(
663 kthread_stop(mp->m_sync_task); 671 kthread_stop(mp->m_sync_task);
664} 672}
665 673
666STATIC int
667xfs_reclaim_inode(
668 xfs_inode_t *ip,
669 int sync_mode)
670{
671 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
672
673 /* The hash lock here protects a thread in xfs_iget_core from
674 * racing with us on linking the inode back with a vnode.
675 * Once we have the XFS_IRECLAIM flag set it will not touch
676 * us.
677 */
678 write_lock(&pag->pag_ici_lock);
679 spin_lock(&ip->i_flags_lock);
680 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
681 !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
682 spin_unlock(&ip->i_flags_lock);
683 write_unlock(&pag->pag_ici_lock);
684 return -EAGAIN;
685 }
686 __xfs_iflags_set(ip, XFS_IRECLAIM);
687 spin_unlock(&ip->i_flags_lock);
688 write_unlock(&pag->pag_ici_lock);
689 xfs_put_perag(ip->i_mount, pag);
690
691 /*
692 * If the inode is still dirty, then flush it out. If the inode
693 * is not in the AIL, then it will be OK to flush it delwri as
694 * long as xfs_iflush() does not keep any references to the inode.
695 * We leave that decision up to xfs_iflush() since it has the
696 * knowledge of whether it's OK to simply do a delwri flush of
697 * the inode or whether we need to wait until the inode is
698 * pulled from the AIL.
699 * We get the flush lock regardless, though, just to make sure
700 * we don't free it while it is being flushed.
701 */
702 xfs_ilock(ip, XFS_ILOCK_EXCL);
703 xfs_iflock(ip);
704
705 /*
706 * In the case of a forced shutdown we rely on xfs_iflush() to
707 * wait for the inode to be unpinned before returning an error.
708 */
709 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
710 /* synchronize with xfs_iflush_done */
711 xfs_iflock(ip);
712 xfs_ifunlock(ip);
713 }
714
715 xfs_iunlock(ip, XFS_ILOCK_EXCL);
716 xfs_ireclaim(ip);
717 return 0;
718}
719
720void 674void
721__xfs_inode_set_reclaim_tag( 675__xfs_inode_set_reclaim_tag(
722 struct xfs_perag *pag, 676 struct xfs_perag *pag,
@@ -759,19 +713,55 @@ __xfs_inode_clear_reclaim_tag(
759} 713}
760 714
761STATIC int 715STATIC int
762xfs_reclaim_inode_now( 716xfs_reclaim_inode(
763 struct xfs_inode *ip, 717 struct xfs_inode *ip,
764 struct xfs_perag *pag, 718 struct xfs_perag *pag,
765 int flags) 719 int sync_mode)
766{ 720{
767 /* ignore if already under reclaim */ 721 /*
768 if (xfs_iflags_test(ip, XFS_IRECLAIM)) { 722 * The radix tree lock here protects a thread in xfs_iget from racing
769 read_unlock(&pag->pag_ici_lock); 723 * with us starting reclaim on the inode. Once we have the
724 * XFS_IRECLAIM flag set it will not touch us.
725 */
726 spin_lock(&ip->i_flags_lock);
727 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
728 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
729 /* ignore as it is already under reclaim */
730 spin_unlock(&ip->i_flags_lock);
731 write_unlock(&pag->pag_ici_lock);
770 return 0; 732 return 0;
771 } 733 }
772 read_unlock(&pag->pag_ici_lock); 734 __xfs_iflags_set(ip, XFS_IRECLAIM);
735 spin_unlock(&ip->i_flags_lock);
736 write_unlock(&pag->pag_ici_lock);
773 737
774 return xfs_reclaim_inode(ip, flags); 738 /*
739 * If the inode is still dirty, then flush it out. If the inode
740 * is not in the AIL, then it will be OK to flush it delwri as
741 * long as xfs_iflush() does not keep any references to the inode.
742 * We leave that decision up to xfs_iflush() since it has the
743 * knowledge of whether it's OK to simply do a delwri flush of
744 * the inode or whether we need to wait until the inode is
745 * pulled from the AIL.
746 * We get the flush lock regardless, though, just to make sure
747 * we don't free it while it is being flushed.
748 */
749 xfs_ilock(ip, XFS_ILOCK_EXCL);
750 xfs_iflock(ip);
751
752 /*
753 * In the case of a forced shutdown we rely on xfs_iflush() to
754 * wait for the inode to be unpinned before returning an error.
755 */
756 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
757 /* synchronize with xfs_iflush_done */
758 xfs_iflock(ip);
759 xfs_ifunlock(ip);
760 }
761
762 xfs_iunlock(ip, XFS_ILOCK_EXCL);
763 xfs_ireclaim(ip);
764 return 0;
775} 765}
776 766
777int 767int
@@ -779,6 +769,6 @@ xfs_reclaim_inodes(
779 xfs_mount_t *mp, 769 xfs_mount_t *mp,
780 int mode) 770 int mode)
781{ 771{
782 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, 772 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
783 XFS_ICI_RECLAIM_TAG); 773 XFS_ICI_RECLAIM_TAG, 1);
784} 774}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index a500b4d91835..ea932b43335d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -54,6 +54,6 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
54int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); 54int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
55int xfs_inode_ag_iterator(struct xfs_mount *mp, 55int xfs_inode_ag_iterator(struct xfs_mount *mp,
56 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 56 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
57 int flags, int tag); 57 int flags, int tag, int write_lock);
58 58
59#endif 59#endif
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
new file mode 100644
index 000000000000..856eb3c8d605
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -0,0 +1,75 @@
1/*
2 * Copyright (c) 2009, Christoph Hellwig
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_bit.h"
22#include "xfs_log.h"
23#include "xfs_inum.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_da_btree.h"
29#include "xfs_bmap_btree.h"
30#include "xfs_alloc_btree.h"
31#include "xfs_ialloc_btree.h"
32#include "xfs_dir2_sf.h"
33#include "xfs_attr_sf.h"
34#include "xfs_dinode.h"
35#include "xfs_inode.h"
36#include "xfs_btree.h"
37#include "xfs_dmapi.h"
38#include "xfs_mount.h"
39#include "xfs_ialloc.h"
40#include "xfs_itable.h"
41#include "xfs_alloc.h"
42#include "xfs_bmap.h"
43#include "xfs_attr.h"
44#include "xfs_attr_sf.h"
45#include "xfs_attr_leaf.h"
46#include "xfs_log_priv.h"
47#include "xfs_buf_item.h"
48#include "xfs_quota.h"
49#include "xfs_iomap.h"
50#include "xfs_aops.h"
51#include "quota/xfs_dquot_item.h"
52#include "quota/xfs_dquot.h"
53
54/*
55 * Format fsblock number into a static buffer & return it.
56 */
57STATIC char *xfs_fmtfsblock(xfs_fsblock_t bno)
58{
59 static char rval[50];
60
61 if (bno == NULLFSBLOCK)
62 sprintf(rval, "NULLFSBLOCK");
63 else if (isnullstartblock(bno))
64 sprintf(rval, "NULLSTARTBLOCK(%lld)", startblockval(bno));
65 else
66 sprintf(rval, "%lld", (xfs_dfsbno_t)bno);
67 return rval;
68}
69
70/*
71 * We include this last to have the helpers above available for the trace
72 * event implementations.
73 */
74#define CREATE_TRACE_POINTS
75#include "xfs_trace.h"
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
new file mode 100644
index 000000000000..c22a608321a3
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -0,0 +1,1422 @@
1/*
2 * Copyright (c) 2009, Christoph Hellwig
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#undef TRACE_SYSTEM
19#define TRACE_SYSTEM xfs
20
21#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ)
22#define _TRACE_XFS_H
23
24#include <linux/tracepoint.h>
25
26struct xfs_agf;
27struct xfs_alloc_arg;
28struct xfs_attr_list_context;
29struct xfs_buf_log_item;
30struct xfs_da_args;
31struct xfs_da_node_entry;
32struct xfs_dquot;
33struct xlog_ticket;
34struct log;
35
36DECLARE_EVENT_CLASS(xfs_attr_list_class,
37 TP_PROTO(struct xfs_attr_list_context *ctx),
38 TP_ARGS(ctx),
39 TP_STRUCT__entry(
40 __field(dev_t, dev)
41 __field(xfs_ino_t, ino)
42 __field(u32, hashval)
43 __field(u32, blkno)
44 __field(u32, offset)
45 __field(void *, alist)
46 __field(int, bufsize)
47 __field(int, count)
48 __field(int, firstu)
49 __field(int, dupcnt)
50 __field(int, flags)
51 ),
52 TP_fast_assign(
53 __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
54 __entry->ino = ctx->dp->i_ino;
55 __entry->hashval = ctx->cursor->hashval;
56 __entry->blkno = ctx->cursor->blkno;
57 __entry->offset = ctx->cursor->offset;
58 __entry->alist = ctx->alist;
59 __entry->bufsize = ctx->bufsize;
60 __entry->count = ctx->count;
61 __entry->firstu = ctx->firstu;
62 __entry->flags = ctx->flags;
63 ),
64 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
65 "alist 0x%p size %u count %u firstu %u flags %d %s",
66 MAJOR(__entry->dev), MINOR(__entry->dev),
67 __entry->ino,
68 __entry->hashval,
69 __entry->blkno,
70 __entry->offset,
71 __entry->dupcnt,
72 __entry->alist,
73 __entry->bufsize,
74 __entry->count,
75 __entry->firstu,
76 __entry->flags,
77 __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
78 )
79)
80
81#define DEFINE_ATTR_LIST_EVENT(name) \
82DEFINE_EVENT(xfs_attr_list_class, name, \
83 TP_PROTO(struct xfs_attr_list_context *ctx), \
84 TP_ARGS(ctx))
85DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
86DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
87DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
88DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end);
89DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full);
90DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
91DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
92DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
93
94TRACE_EVENT(xfs_attr_list_node_descend,
95 TP_PROTO(struct xfs_attr_list_context *ctx,
96 struct xfs_da_node_entry *btree),
97 TP_ARGS(ctx, btree),
98 TP_STRUCT__entry(
99 __field(dev_t, dev)
100 __field(xfs_ino_t, ino)
101 __field(u32, hashval)
102 __field(u32, blkno)
103 __field(u32, offset)
104 __field(void *, alist)
105 __field(int, bufsize)
106 __field(int, count)
107 __field(int, firstu)
108 __field(int, dupcnt)
109 __field(int, flags)
110 __field(u32, bt_hashval)
111 __field(u32, bt_before)
112 ),
113 TP_fast_assign(
114 __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
115 __entry->ino = ctx->dp->i_ino;
116 __entry->hashval = ctx->cursor->hashval;
117 __entry->blkno = ctx->cursor->blkno;
118 __entry->offset = ctx->cursor->offset;
119 __entry->alist = ctx->alist;
120 __entry->bufsize = ctx->bufsize;
121 __entry->count = ctx->count;
122 __entry->firstu = ctx->firstu;
123 __entry->flags = ctx->flags;
124 __entry->bt_hashval = be32_to_cpu(btree->hashval);
125 __entry->bt_before = be32_to_cpu(btree->before);
126 ),
127 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
128 "alist 0x%p size %u count %u firstu %u flags %d %s "
129 "node hashval %u, node before %u",
130 MAJOR(__entry->dev), MINOR(__entry->dev),
131 __entry->ino,
132 __entry->hashval,
133 __entry->blkno,
134 __entry->offset,
135 __entry->dupcnt,
136 __entry->alist,
137 __entry->bufsize,
138 __entry->count,
139 __entry->firstu,
140 __entry->flags,
141 __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS),
142 __entry->bt_hashval,
143 __entry->bt_before)
144);
145
146TRACE_EVENT(xfs_iext_insert,
147 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx,
148 struct xfs_bmbt_irec *r, int state, unsigned long caller_ip),
149 TP_ARGS(ip, idx, r, state, caller_ip),
150 TP_STRUCT__entry(
151 __field(dev_t, dev)
152 __field(xfs_ino_t, ino)
153 __field(xfs_extnum_t, idx)
154 __field(xfs_fileoff_t, startoff)
155 __field(xfs_fsblock_t, startblock)
156 __field(xfs_filblks_t, blockcount)
157 __field(xfs_exntst_t, state)
158 __field(int, bmap_state)
159 __field(unsigned long, caller_ip)
160 ),
161 TP_fast_assign(
162 __entry->dev = VFS_I(ip)->i_sb->s_dev;
163 __entry->ino = ip->i_ino;
164 __entry->idx = idx;
165 __entry->startoff = r->br_startoff;
166 __entry->startblock = r->br_startblock;
167 __entry->blockcount = r->br_blockcount;
168 __entry->state = r->br_state;
169 __entry->bmap_state = state;
170 __entry->caller_ip = caller_ip;
171 ),
172 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
173 "offset %lld block %s count %lld flag %d caller %pf",
174 MAJOR(__entry->dev), MINOR(__entry->dev),
175 __entry->ino,
176 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
177 (long)__entry->idx,
178 __entry->startoff,
179 xfs_fmtfsblock(__entry->startblock),
180 __entry->blockcount,
181 __entry->state,
182 (char *)__entry->caller_ip)
183);
184
185DECLARE_EVENT_CLASS(xfs_bmap_class,
186 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
187 unsigned long caller_ip),
188 TP_ARGS(ip, idx, state, caller_ip),
189 TP_STRUCT__entry(
190 __field(dev_t, dev)
191 __field(xfs_ino_t, ino)
192 __field(xfs_extnum_t, idx)
193 __field(xfs_fileoff_t, startoff)
194 __field(xfs_fsblock_t, startblock)
195 __field(xfs_filblks_t, blockcount)
196 __field(xfs_exntst_t, state)
197 __field(int, bmap_state)
198 __field(unsigned long, caller_ip)
199 ),
200 TP_fast_assign(
201 struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ?
202 ip->i_afp : &ip->i_df;
203 struct xfs_bmbt_irec r;
204
205 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
206 __entry->dev = VFS_I(ip)->i_sb->s_dev;
207 __entry->ino = ip->i_ino;
208 __entry->idx = idx;
209 __entry->startoff = r.br_startoff;
210 __entry->startblock = r.br_startblock;
211 __entry->blockcount = r.br_blockcount;
212 __entry->state = r.br_state;
213 __entry->bmap_state = state;
214 __entry->caller_ip = caller_ip;
215 ),
216 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
217 "offset %lld block %s count %lld flag %d caller %pf",
218 MAJOR(__entry->dev), MINOR(__entry->dev),
219 __entry->ino,
220 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
221 (long)__entry->idx,
222 __entry->startoff,
223 xfs_fmtfsblock(__entry->startblock),
224 __entry->blockcount,
225 __entry->state,
226 (char *)__entry->caller_ip)
227)
228
229#define DEFINE_BMAP_EVENT(name) \
230DEFINE_EVENT(xfs_bmap_class, name, \
231 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
232 unsigned long caller_ip), \
233 TP_ARGS(ip, idx, state, caller_ip))
234DEFINE_BMAP_EVENT(xfs_iext_remove);
235DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
236DEFINE_BMAP_EVENT(xfs_bmap_post_update);
237DEFINE_BMAP_EVENT(xfs_extlist);
238
239DECLARE_EVENT_CLASS(xfs_buf_class,
240 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
241 TP_ARGS(bp, caller_ip),
242 TP_STRUCT__entry(
243 __field(dev_t, dev)
244 __field(xfs_daddr_t, bno)
245 __field(size_t, buffer_length)
246 __field(int, hold)
247 __field(int, pincount)
248 __field(unsigned, lockval)
249 __field(unsigned, flags)
250 __field(unsigned long, caller_ip)
251 ),
252 TP_fast_assign(
253 __entry->dev = bp->b_target->bt_dev;
254 __entry->bno = bp->b_bn;
255 __entry->buffer_length = bp->b_buffer_length;
256 __entry->hold = atomic_read(&bp->b_hold);
257 __entry->pincount = atomic_read(&bp->b_pin_count);
258 __entry->lockval = xfs_buf_lock_value(bp);
259 __entry->flags = bp->b_flags;
260 __entry->caller_ip = caller_ip;
261 ),
262 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
263 "lock %d flags %s caller %pf",
264 MAJOR(__entry->dev), MINOR(__entry->dev),
265 (unsigned long long)__entry->bno,
266 __entry->buffer_length,
267 __entry->hold,
268 __entry->pincount,
269 __entry->lockval,
270 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
271 (void *)__entry->caller_ip)
272)
273
274#define DEFINE_BUF_EVENT(name) \
275DEFINE_EVENT(xfs_buf_class, name, \
276 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
277 TP_ARGS(bp, caller_ip))
278DEFINE_BUF_EVENT(xfs_buf_init);
279DEFINE_BUF_EVENT(xfs_buf_free);
280DEFINE_BUF_EVENT(xfs_buf_hold);
281DEFINE_BUF_EVENT(xfs_buf_rele);
282DEFINE_BUF_EVENT(xfs_buf_pin);
283DEFINE_BUF_EVENT(xfs_buf_unpin);
284DEFINE_BUF_EVENT(xfs_buf_iodone);
285DEFINE_BUF_EVENT(xfs_buf_iorequest);
286DEFINE_BUF_EVENT(xfs_buf_bawrite);
287DEFINE_BUF_EVENT(xfs_buf_bdwrite);
288DEFINE_BUF_EVENT(xfs_buf_lock);
289DEFINE_BUF_EVENT(xfs_buf_lock_done);
290DEFINE_BUF_EVENT(xfs_buf_cond_lock);
291DEFINE_BUF_EVENT(xfs_buf_unlock);
292DEFINE_BUF_EVENT(xfs_buf_ordered_retry);
293DEFINE_BUF_EVENT(xfs_buf_iowait);
294DEFINE_BUF_EVENT(xfs_buf_iowait_done);
295DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
296DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
297DEFINE_BUF_EVENT(xfs_buf_delwri_split);
298DEFINE_BUF_EVENT(xfs_buf_get_noaddr);
299DEFINE_BUF_EVENT(xfs_bdstrat_shut);
300DEFINE_BUF_EVENT(xfs_buf_item_relse);
301DEFINE_BUF_EVENT(xfs_buf_item_iodone);
302DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
303DEFINE_BUF_EVENT(xfs_buf_error_relse);
304DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
305DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
306
307/* not really buffer traces, but the buf provides useful information */
308DEFINE_BUF_EVENT(xfs_btree_corrupt);
309DEFINE_BUF_EVENT(xfs_da_btree_corrupt);
310DEFINE_BUF_EVENT(xfs_reset_dqcounts);
311DEFINE_BUF_EVENT(xfs_inode_item_push);
312
313/* pass flags explicitly */
314DECLARE_EVENT_CLASS(xfs_buf_flags_class,
315 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
316 TP_ARGS(bp, flags, caller_ip),
317 TP_STRUCT__entry(
318 __field(dev_t, dev)
319 __field(xfs_daddr_t, bno)
320 __field(size_t, buffer_length)
321 __field(int, hold)
322 __field(int, pincount)
323 __field(unsigned, lockval)
324 __field(unsigned, flags)
325 __field(unsigned long, caller_ip)
326 ),
327 TP_fast_assign(
328 __entry->dev = bp->b_target->bt_dev;
329 __entry->bno = bp->b_bn;
330 __entry->buffer_length = bp->b_buffer_length;
331 __entry->flags = flags;
332 __entry->hold = atomic_read(&bp->b_hold);
333 __entry->pincount = atomic_read(&bp->b_pin_count);
334 __entry->lockval = xfs_buf_lock_value(bp);
335 __entry->caller_ip = caller_ip;
336 ),
337 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
338 "lock %d flags %s caller %pf",
339 MAJOR(__entry->dev), MINOR(__entry->dev),
340 (unsigned long long)__entry->bno,
341 __entry->buffer_length,
342 __entry->hold,
343 __entry->pincount,
344 __entry->lockval,
345 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
346 (void *)__entry->caller_ip)
347)
348
349#define DEFINE_BUF_FLAGS_EVENT(name) \
350DEFINE_EVENT(xfs_buf_flags_class, name, \
351 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
352 TP_ARGS(bp, flags, caller_ip))
353DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
354DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
355DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
356
357TRACE_EVENT(xfs_buf_ioerror,
358 TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip),
359 TP_ARGS(bp, error, caller_ip),
360 TP_STRUCT__entry(
361 __field(dev_t, dev)
362 __field(xfs_daddr_t, bno)
363 __field(size_t, buffer_length)
364 __field(unsigned, flags)
365 __field(int, hold)
366 __field(int, pincount)
367 __field(unsigned, lockval)
368 __field(int, error)
369 __field(unsigned long, caller_ip)
370 ),
371 TP_fast_assign(
372 __entry->dev = bp->b_target->bt_dev;
373 __entry->bno = bp->b_bn;
374 __entry->buffer_length = bp->b_buffer_length;
375 __entry->hold = atomic_read(&bp->b_hold);
376 __entry->pincount = atomic_read(&bp->b_pin_count);
377 __entry->lockval = xfs_buf_lock_value(bp);
378 __entry->error = error;
379 __entry->flags = bp->b_flags;
380 __entry->caller_ip = caller_ip;
381 ),
382 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
383 "lock %d error %d flags %s caller %pf",
384 MAJOR(__entry->dev), MINOR(__entry->dev),
385 (unsigned long long)__entry->bno,
386 __entry->buffer_length,
387 __entry->hold,
388 __entry->pincount,
389 __entry->lockval,
390 __entry->error,
391 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
392 (void *)__entry->caller_ip)
393);
394
395DECLARE_EVENT_CLASS(xfs_buf_item_class,
396 TP_PROTO(struct xfs_buf_log_item *bip),
397 TP_ARGS(bip),
398 TP_STRUCT__entry(
399 __field(dev_t, dev)
400 __field(xfs_daddr_t, buf_bno)
401 __field(size_t, buf_len)
402 __field(int, buf_hold)
403 __field(int, buf_pincount)
404 __field(int, buf_lockval)
405 __field(unsigned, buf_flags)
406 __field(unsigned, bli_recur)
407 __field(int, bli_refcount)
408 __field(unsigned, bli_flags)
409 __field(void *, li_desc)
410 __field(unsigned, li_flags)
411 ),
412 TP_fast_assign(
413 __entry->dev = bip->bli_buf->b_target->bt_dev;
414 __entry->bli_flags = bip->bli_flags;
415 __entry->bli_recur = bip->bli_recur;
416 __entry->bli_refcount = atomic_read(&bip->bli_refcount);
417 __entry->buf_bno = bip->bli_buf->b_bn;
418 __entry->buf_len = bip->bli_buf->b_buffer_length;
419 __entry->buf_flags = bip->bli_buf->b_flags;
420 __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
421 __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
422 __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf);
423 __entry->li_desc = bip->bli_item.li_desc;
424 __entry->li_flags = bip->bli_item.li_flags;
425 ),
426 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
427 "lock %d flags %s recur %d refcount %d bliflags %s "
428 "lidesc 0x%p liflags %s",
429 MAJOR(__entry->dev), MINOR(__entry->dev),
430 (unsigned long long)__entry->buf_bno,
431 __entry->buf_len,
432 __entry->buf_hold,
433 __entry->buf_pincount,
434 __entry->buf_lockval,
435 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
436 __entry->bli_recur,
437 __entry->bli_refcount,
438 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
439 __entry->li_desc,
440 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
441)
442
443#define DEFINE_BUF_ITEM_EVENT(name) \
444DEFINE_EVENT(xfs_buf_item_class, name, \
445 TP_PROTO(struct xfs_buf_log_item *bip), \
446 TP_ARGS(bip))
447DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
448DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
449DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
450DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
451DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
452DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
453DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
454DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock);
455DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
456DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
457DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
458DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
459DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
460DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
461DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
462DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur);
463DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf);
464DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur);
465DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf);
466DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse);
467DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
468DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
469DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
470DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
471
472DECLARE_EVENT_CLASS(xfs_lock_class,
473 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
474 unsigned long caller_ip),
475 TP_ARGS(ip, lock_flags, caller_ip),
476 TP_STRUCT__entry(
477 __field(dev_t, dev)
478 __field(xfs_ino_t, ino)
479 __field(int, lock_flags)
480 __field(unsigned long, caller_ip)
481 ),
482 TP_fast_assign(
483 __entry->dev = VFS_I(ip)->i_sb->s_dev;
484 __entry->ino = ip->i_ino;
485 __entry->lock_flags = lock_flags;
486 __entry->caller_ip = caller_ip;
487 ),
488 TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
489 MAJOR(__entry->dev), MINOR(__entry->dev),
490 __entry->ino,
491 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
492 (void *)__entry->caller_ip)
493)
494
495#define DEFINE_LOCK_EVENT(name) \
496DEFINE_EVENT(xfs_lock_class, name, \
497 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
498 unsigned long caller_ip), \
499 TP_ARGS(ip, lock_flags, caller_ip))
500DEFINE_LOCK_EVENT(xfs_ilock);
501DEFINE_LOCK_EVENT(xfs_ilock_nowait);
502DEFINE_LOCK_EVENT(xfs_ilock_demote);
503DEFINE_LOCK_EVENT(xfs_iunlock);
504
505DECLARE_EVENT_CLASS(xfs_iget_class,
506 TP_PROTO(struct xfs_inode *ip),
507 TP_ARGS(ip),
508 TP_STRUCT__entry(
509 __field(dev_t, dev)
510 __field(xfs_ino_t, ino)
511 ),
512 TP_fast_assign(
513 __entry->dev = VFS_I(ip)->i_sb->s_dev;
514 __entry->ino = ip->i_ino;
515 ),
516 TP_printk("dev %d:%d ino 0x%llx",
517 MAJOR(__entry->dev), MINOR(__entry->dev),
518 __entry->ino)
519)
520
521#define DEFINE_IGET_EVENT(name) \
522DEFINE_EVENT(xfs_iget_class, name, \
523 TP_PROTO(struct xfs_inode *ip), \
524 TP_ARGS(ip))
525DEFINE_IGET_EVENT(xfs_iget_skip);
526DEFINE_IGET_EVENT(xfs_iget_reclaim);
527DEFINE_IGET_EVENT(xfs_iget_found);
528DEFINE_IGET_EVENT(xfs_iget_alloc);
529
530DECLARE_EVENT_CLASS(xfs_inode_class,
531 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
532 TP_ARGS(ip, caller_ip),
533 TP_STRUCT__entry(
534 __field(dev_t, dev)
535 __field(xfs_ino_t, ino)
536 __field(int, count)
537 __field(unsigned long, caller_ip)
538 ),
539 TP_fast_assign(
540 __entry->dev = VFS_I(ip)->i_sb->s_dev;
541 __entry->ino = ip->i_ino;
542 __entry->count = atomic_read(&VFS_I(ip)->i_count);
543 __entry->caller_ip = caller_ip;
544 ),
545 TP_printk("dev %d:%d ino 0x%llx count %d caller %pf",
546 MAJOR(__entry->dev), MINOR(__entry->dev),
547 __entry->ino,
548 __entry->count,
549 (char *)__entry->caller_ip)
550)
551
552#define DEFINE_INODE_EVENT(name) \
553DEFINE_EVENT(xfs_inode_class, name, \
554 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
555 TP_ARGS(ip, caller_ip))
556DEFINE_INODE_EVENT(xfs_ihold);
557DEFINE_INODE_EVENT(xfs_irele);
558/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
559DEFINE_INODE_EVENT(xfs_inode);
560#define xfs_itrace_entry(ip) \
561 trace_xfs_inode(ip, _THIS_IP_)
562
563DECLARE_EVENT_CLASS(xfs_dquot_class,
564 TP_PROTO(struct xfs_dquot *dqp),
565 TP_ARGS(dqp),
566 TP_STRUCT__entry(
567 __field(dev_t, dev)
568 __field(__be32, id)
569 __field(unsigned, flags)
570 __field(unsigned, nrefs)
571 __field(unsigned long long, res_bcount)
572 __field(unsigned long long, bcount)
573 __field(unsigned long long, icount)
574 __field(unsigned long long, blk_hardlimit)
575 __field(unsigned long long, blk_softlimit)
576 __field(unsigned long long, ino_hardlimit)
577 __field(unsigned long long, ino_softlimit)
578 ), \
579 TP_fast_assign(
580 __entry->dev = dqp->q_mount->m_super->s_dev;
581 __entry->id = dqp->q_core.d_id;
582 __entry->flags = dqp->dq_flags;
583 __entry->nrefs = dqp->q_nrefs;
584 __entry->res_bcount = dqp->q_res_bcount;
585 __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
586 __entry->icount = be64_to_cpu(dqp->q_core.d_icount);
587 __entry->blk_hardlimit =
588 be64_to_cpu(dqp->q_core.d_blk_hardlimit);
589 __entry->blk_softlimit =
590 be64_to_cpu(dqp->q_core.d_blk_softlimit);
591 __entry->ino_hardlimit =
592 be64_to_cpu(dqp->q_core.d_ino_hardlimit);
593 __entry->ino_softlimit =
594 be64_to_cpu(dqp->q_core.d_ino_softlimit);
595 ),
596 TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
597 "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] "
598 "icnt 0x%llx [hard 0x%llx | soft 0x%llx]",
599 MAJOR(__entry->dev), MINOR(__entry->dev),
600 be32_to_cpu(__entry->id),
601 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
602 __entry->nrefs,
603 __entry->res_bcount,
604 __entry->bcount,
605 __entry->blk_hardlimit,
606 __entry->blk_softlimit,
607 __entry->icount,
608 __entry->ino_hardlimit,
609 __entry->ino_softlimit)
610)
611
612#define DEFINE_DQUOT_EVENT(name) \
613DEFINE_EVENT(xfs_dquot_class, name, \
614 TP_PROTO(struct xfs_dquot *dqp), \
615 TP_ARGS(dqp))
616DEFINE_DQUOT_EVENT(xfs_dqadjust);
617DEFINE_DQUOT_EVENT(xfs_dqshake_dirty);
618DEFINE_DQUOT_EVENT(xfs_dqshake_unlink);
619DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
620DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
621DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
622DEFINE_DQUOT_EVENT(xfs_dqattach_found);
623DEFINE_DQUOT_EVENT(xfs_dqattach_get);
624DEFINE_DQUOT_EVENT(xfs_dqinit);
625DEFINE_DQUOT_EVENT(xfs_dqreuse);
626DEFINE_DQUOT_EVENT(xfs_dqalloc);
627DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
628DEFINE_DQUOT_EVENT(xfs_dqread);
629DEFINE_DQUOT_EVENT(xfs_dqread_fail);
630DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
631DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
632DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
633DEFINE_DQUOT_EVENT(xfs_dqlookup_move);
634DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
635DEFINE_DQUOT_EVENT(xfs_dqget_hit);
636DEFINE_DQUOT_EVENT(xfs_dqget_miss);
637DEFINE_DQUOT_EVENT(xfs_dqput);
638DEFINE_DQUOT_EVENT(xfs_dqput_wait);
639DEFINE_DQUOT_EVENT(xfs_dqput_free);
640DEFINE_DQUOT_EVENT(xfs_dqrele);
641DEFINE_DQUOT_EVENT(xfs_dqflush);
642DEFINE_DQUOT_EVENT(xfs_dqflush_force);
643DEFINE_DQUOT_EVENT(xfs_dqflush_done);
644/* not really iget events, but we re-use the format */
645DEFINE_IGET_EVENT(xfs_dquot_dqalloc);
646DEFINE_IGET_EVENT(xfs_dquot_dqdetach);
647
648DECLARE_EVENT_CLASS(xfs_loggrant_class,
649 TP_PROTO(struct log *log, struct xlog_ticket *tic),
650 TP_ARGS(log, tic),
651 TP_STRUCT__entry(
652 __field(dev_t, dev)
653 __field(unsigned, trans_type)
654 __field(char, ocnt)
655 __field(char, cnt)
656 __field(int, curr_res)
657 __field(int, unit_res)
658 __field(unsigned int, flags)
659 __field(void *, reserve_headq)
660 __field(void *, write_headq)
661 __field(int, grant_reserve_cycle)
662 __field(int, grant_reserve_bytes)
663 __field(int, grant_write_cycle)
664 __field(int, grant_write_bytes)
665 __field(int, curr_cycle)
666 __field(int, curr_block)
667 __field(xfs_lsn_t, tail_lsn)
668 ),
669 TP_fast_assign(
670 __entry->dev = log->l_mp->m_super->s_dev;
671 __entry->trans_type = tic->t_trans_type;
672 __entry->ocnt = tic->t_ocnt;
673 __entry->cnt = tic->t_cnt;
674 __entry->curr_res = tic->t_curr_res;
675 __entry->unit_res = tic->t_unit_res;
676 __entry->flags = tic->t_flags;
677 __entry->reserve_headq = log->l_reserve_headq;
678 __entry->write_headq = log->l_write_headq;
679 __entry->grant_reserve_cycle = log->l_grant_reserve_cycle;
680 __entry->grant_reserve_bytes = log->l_grant_reserve_bytes;
681 __entry->grant_write_cycle = log->l_grant_write_cycle;
682 __entry->grant_write_bytes = log->l_grant_write_bytes;
683 __entry->curr_cycle = log->l_curr_cycle;
684 __entry->curr_block = log->l_curr_block;
685 __entry->tail_lsn = log->l_tail_lsn;
686 ),
687 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
688 "t_unit_res %u t_flags %s reserve_headq 0x%p "
689 "write_headq 0x%p grant_reserve_cycle %d "
690 "grant_reserve_bytes %d grant_write_cycle %d "
691 "grant_write_bytes %d curr_cycle %d curr_block %d "
692 "tail_cycle %d tail_block %d",
693 MAJOR(__entry->dev), MINOR(__entry->dev),
694 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
695 __entry->ocnt,
696 __entry->cnt,
697 __entry->curr_res,
698 __entry->unit_res,
699 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
700 __entry->reserve_headq,
701 __entry->write_headq,
702 __entry->grant_reserve_cycle,
703 __entry->grant_reserve_bytes,
704 __entry->grant_write_cycle,
705 __entry->grant_write_bytes,
706 __entry->curr_cycle,
707 __entry->curr_block,
708 CYCLE_LSN(__entry->tail_lsn),
709 BLOCK_LSN(__entry->tail_lsn)
710 )
711)
712
713#define DEFINE_LOGGRANT_EVENT(name) \
714DEFINE_EVENT(xfs_loggrant_class, name, \
715 TP_PROTO(struct log *log, struct xlog_ticket *tic), \
716 TP_ARGS(log, tic))
717DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
718DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
719DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
720DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
721DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
722DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
723DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
724DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
725DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
726DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
727DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
728DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
729DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
730DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
731DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
732DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
733DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
734DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
735DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
736DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
737DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
738DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
739DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
740DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
741
742#define DEFINE_RW_EVENT(name) \
743TRACE_EVENT(name, \
744 TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
745 TP_ARGS(ip, count, offset, flags), \
746 TP_STRUCT__entry( \
747 __field(dev_t, dev) \
748 __field(xfs_ino_t, ino) \
749 __field(xfs_fsize_t, size) \
750 __field(xfs_fsize_t, new_size) \
751 __field(loff_t, offset) \
752 __field(size_t, count) \
753 __field(int, flags) \
754 ), \
755 TP_fast_assign( \
756 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
757 __entry->ino = ip->i_ino; \
758 __entry->size = ip->i_d.di_size; \
759 __entry->new_size = ip->i_new_size; \
760 __entry->offset = offset; \
761 __entry->count = count; \
762 __entry->flags = flags; \
763 ), \
764 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
765 "offset 0x%llx count 0x%zx ioflags %s", \
766 MAJOR(__entry->dev), MINOR(__entry->dev), \
767 __entry->ino, \
768 __entry->size, \
769 __entry->new_size, \
770 __entry->offset, \
771 __entry->count, \
772 __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) \
773)
774DEFINE_RW_EVENT(xfs_file_read);
775DEFINE_RW_EVENT(xfs_file_buffered_write);
776DEFINE_RW_EVENT(xfs_file_direct_write);
777DEFINE_RW_EVENT(xfs_file_splice_read);
778DEFINE_RW_EVENT(xfs_file_splice_write);
779
780
781#define DEFINE_PAGE_EVENT(name) \
782TRACE_EVENT(name, \
783 TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \
784 TP_ARGS(inode, page, off), \
785 TP_STRUCT__entry( \
786 __field(dev_t, dev) \
787 __field(xfs_ino_t, ino) \
788 __field(pgoff_t, pgoff) \
789 __field(loff_t, size) \
790 __field(unsigned long, offset) \
791 __field(int, delalloc) \
792 __field(int, unmapped) \
793 __field(int, unwritten) \
794 ), \
795 TP_fast_assign( \
796 int delalloc = -1, unmapped = -1, unwritten = -1; \
797 \
798 if (page_has_buffers(page)) \
799 xfs_count_page_state(page, &delalloc, \
800 &unmapped, &unwritten); \
801 __entry->dev = inode->i_sb->s_dev; \
802 __entry->ino = XFS_I(inode)->i_ino; \
803 __entry->pgoff = page_offset(page); \
804 __entry->size = i_size_read(inode); \
805 __entry->offset = off; \
806 __entry->delalloc = delalloc; \
807 __entry->unmapped = unmapped; \
808 __entry->unwritten = unwritten; \
809 ), \
810 TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " \
811 "delalloc %d unmapped %d unwritten %d", \
812 MAJOR(__entry->dev), MINOR(__entry->dev), \
813 __entry->ino, \
814 __entry->pgoff, \
815 __entry->size, \
816 __entry->offset, \
817 __entry->delalloc, \
818 __entry->unmapped, \
819 __entry->unwritten) \
820)
821DEFINE_PAGE_EVENT(xfs_writepage);
822DEFINE_PAGE_EVENT(xfs_releasepage);
823DEFINE_PAGE_EVENT(xfs_invalidatepage);
824
825#define DEFINE_IOMAP_EVENT(name) \
826TRACE_EVENT(name, \
827 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
828 int flags, struct xfs_bmbt_irec *irec), \
829 TP_ARGS(ip, offset, count, flags, irec), \
830 TP_STRUCT__entry( \
831 __field(dev_t, dev) \
832 __field(xfs_ino_t, ino) \
833 __field(loff_t, size) \
834 __field(loff_t, new_size) \
835 __field(loff_t, offset) \
836 __field(size_t, count) \
837 __field(int, flags) \
838 __field(xfs_fileoff_t, startoff) \
839 __field(xfs_fsblock_t, startblock) \
840 __field(xfs_filblks_t, blockcount) \
841 ), \
842 TP_fast_assign( \
843 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
844 __entry->ino = ip->i_ino; \
845 __entry->size = ip->i_d.di_size; \
846 __entry->new_size = ip->i_new_size; \
847 __entry->offset = offset; \
848 __entry->count = count; \
849 __entry->flags = flags; \
850 __entry->startoff = irec ? irec->br_startoff : 0; \
851 __entry->startblock = irec ? irec->br_startblock : 0; \
852 __entry->blockcount = irec ? irec->br_blockcount : 0; \
853 ), \
854 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
855 "offset 0x%llx count %zd flags %s " \
856 "startoff 0x%llx startblock %s blockcount 0x%llx", \
857 MAJOR(__entry->dev), MINOR(__entry->dev), \
858 __entry->ino, \
859 __entry->size, \
860 __entry->new_size, \
861 __entry->offset, \
862 __entry->count, \
863 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \
864 __entry->startoff, \
865 xfs_fmtfsblock(__entry->startblock), \
866 __entry->blockcount) \
867)
868DEFINE_IOMAP_EVENT(xfs_iomap_enter);
869DEFINE_IOMAP_EVENT(xfs_iomap_found);
870DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
871
872#define DEFINE_SIMPLE_IO_EVENT(name) \
873TRACE_EVENT(name, \
874 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \
875 TP_ARGS(ip, offset, count), \
876 TP_STRUCT__entry( \
877 __field(dev_t, dev) \
878 __field(xfs_ino_t, ino) \
879 __field(loff_t, size) \
880 __field(loff_t, new_size) \
881 __field(loff_t, offset) \
882 __field(size_t, count) \
883 ), \
884 TP_fast_assign( \
885 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
886 __entry->ino = ip->i_ino; \
887 __entry->size = ip->i_d.di_size; \
888 __entry->new_size = ip->i_new_size; \
889 __entry->offset = offset; \
890 __entry->count = count; \
891 ), \
892 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
893 "offset 0x%llx count %zd", \
894 MAJOR(__entry->dev), MINOR(__entry->dev), \
895 __entry->ino, \
896 __entry->size, \
897 __entry->new_size, \
898 __entry->offset, \
899 __entry->count) \
900);
901DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
902DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
903
904
905TRACE_EVENT(xfs_itruncate_start,
906 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size, int flag,
907 xfs_off_t toss_start, xfs_off_t toss_finish),
908 TP_ARGS(ip, new_size, flag, toss_start, toss_finish),
909 TP_STRUCT__entry(
910 __field(dev_t, dev)
911 __field(xfs_ino_t, ino)
912 __field(xfs_fsize_t, size)
913 __field(xfs_fsize_t, new_size)
914 __field(xfs_off_t, toss_start)
915 __field(xfs_off_t, toss_finish)
916 __field(int, flag)
917 ),
918 TP_fast_assign(
919 __entry->dev = VFS_I(ip)->i_sb->s_dev;
920 __entry->ino = ip->i_ino;
921 __entry->size = ip->i_d.di_size;
922 __entry->new_size = new_size;
923 __entry->toss_start = toss_start;
924 __entry->toss_finish = toss_finish;
925 __entry->flag = flag;
926 ),
927 TP_printk("dev %d:%d ino 0x%llx %s size 0x%llx new_size 0x%llx "
928 "toss start 0x%llx toss finish 0x%llx",
929 MAJOR(__entry->dev), MINOR(__entry->dev),
930 __entry->ino,
931 __print_flags(__entry->flag, "|", XFS_ITRUNC_FLAGS),
932 __entry->size,
933 __entry->new_size,
934 __entry->toss_start,
935 __entry->toss_finish)
936);
937
938DECLARE_EVENT_CLASS(xfs_itrunc_class,
939 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
940 TP_ARGS(ip, new_size),
941 TP_STRUCT__entry(
942 __field(dev_t, dev)
943 __field(xfs_ino_t, ino)
944 __field(xfs_fsize_t, size)
945 __field(xfs_fsize_t, new_size)
946 ),
947 TP_fast_assign(
948 __entry->dev = VFS_I(ip)->i_sb->s_dev;
949 __entry->ino = ip->i_ino;
950 __entry->size = ip->i_d.di_size;
951 __entry->new_size = new_size;
952 ),
953 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
954 MAJOR(__entry->dev), MINOR(__entry->dev),
955 __entry->ino,
956 __entry->size,
957 __entry->new_size)
958)
959
960#define DEFINE_ITRUNC_EVENT(name) \
961DEFINE_EVENT(xfs_itrunc_class, name, \
962 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
963 TP_ARGS(ip, new_size))
964DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start);
965DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end);
966
967TRACE_EVENT(xfs_pagecache_inval,
968 TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
969 TP_ARGS(ip, start, finish),
970 TP_STRUCT__entry(
971 __field(dev_t, dev)
972 __field(xfs_ino_t, ino)
973 __field(xfs_fsize_t, size)
974 __field(xfs_off_t, start)
975 __field(xfs_off_t, finish)
976 ),
977 TP_fast_assign(
978 __entry->dev = VFS_I(ip)->i_sb->s_dev;
979 __entry->ino = ip->i_ino;
980 __entry->size = ip->i_d.di_size;
981 __entry->start = start;
982 __entry->finish = finish;
983 ),
984 TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx",
985 MAJOR(__entry->dev), MINOR(__entry->dev),
986 __entry->ino,
987 __entry->size,
988 __entry->start,
989 __entry->finish)
990);
991
992TRACE_EVENT(xfs_bunmap,
993 TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len,
994 int flags, unsigned long caller_ip),
995 TP_ARGS(ip, bno, len, flags, caller_ip),
996 TP_STRUCT__entry(
997 __field(dev_t, dev)
998 __field(xfs_ino_t, ino)
999 __field(xfs_fsize_t, size)
1000 __field(xfs_fileoff_t, bno)
1001 __field(xfs_filblks_t, len)
1002 __field(unsigned long, caller_ip)
1003 __field(int, flags)
1004 ),
1005 TP_fast_assign(
1006 __entry->dev = VFS_I(ip)->i_sb->s_dev;
1007 __entry->ino = ip->i_ino;
1008 __entry->size = ip->i_d.di_size;
1009 __entry->bno = bno;
1010 __entry->len = len;
1011 __entry->caller_ip = caller_ip;
1012 __entry->flags = flags;
1013 ),
1014 TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
1015 "flags %s caller %pf",
1016 MAJOR(__entry->dev), MINOR(__entry->dev),
1017 __entry->ino,
1018 __entry->size,
1019 __entry->bno,
1020 __entry->len,
1021 __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS),
1022 (void *)__entry->caller_ip)
1023
1024);
1025
1026TRACE_EVENT(xfs_alloc_busy,
1027 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1028 xfs_extlen_t len, int slot),
1029 TP_ARGS(mp, agno, agbno, len, slot),
1030 TP_STRUCT__entry(
1031 __field(dev_t, dev)
1032 __field(xfs_agnumber_t, agno)
1033 __field(xfs_agblock_t, agbno)
1034 __field(xfs_extlen_t, len)
1035 __field(int, slot)
1036 ),
1037 TP_fast_assign(
1038 __entry->dev = mp->m_super->s_dev;
1039 __entry->agno = agno;
1040 __entry->agbno = agbno;
1041 __entry->len = len;
1042 __entry->slot = slot;
1043 ),
1044 TP_printk("dev %d:%d agno %u agbno %u len %u slot %d",
1045 MAJOR(__entry->dev), MINOR(__entry->dev),
1046 __entry->agno,
1047 __entry->agbno,
1048 __entry->len,
1049 __entry->slot)
1050
1051);
1052
1053#define XFS_BUSY_STATES \
1054 { 0, "found" }, \
1055 { 1, "missing" }
1056
1057TRACE_EVENT(xfs_alloc_unbusy,
1058 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
1059 int slot, int found),
1060 TP_ARGS(mp, agno, slot, found),
1061 TP_STRUCT__entry(
1062 __field(dev_t, dev)
1063 __field(xfs_agnumber_t, agno)
1064 __field(int, slot)
1065 __field(int, found)
1066 ),
1067 TP_fast_assign(
1068 __entry->dev = mp->m_super->s_dev;
1069 __entry->agno = agno;
1070 __entry->slot = slot;
1071 __entry->found = found;
1072 ),
1073 TP_printk("dev %d:%d agno %u slot %d %s",
1074 MAJOR(__entry->dev), MINOR(__entry->dev),
1075 __entry->agno,
1076 __entry->slot,
1077 __print_symbolic(__entry->found, XFS_BUSY_STATES))
1078);
1079
1080TRACE_EVENT(xfs_alloc_busysearch,
1081 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1082 xfs_extlen_t len, xfs_lsn_t lsn),
1083 TP_ARGS(mp, agno, agbno, len, lsn),
1084 TP_STRUCT__entry(
1085 __field(dev_t, dev)
1086 __field(xfs_agnumber_t, agno)
1087 __field(xfs_agblock_t, agbno)
1088 __field(xfs_extlen_t, len)
1089 __field(xfs_lsn_t, lsn)
1090 ),
1091 TP_fast_assign(
1092 __entry->dev = mp->m_super->s_dev;
1093 __entry->agno = agno;
1094 __entry->agbno = agbno;
1095 __entry->len = len;
1096 __entry->lsn = lsn;
1097 ),
1098 TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx",
1099 MAJOR(__entry->dev), MINOR(__entry->dev),
1100 __entry->agno,
1101 __entry->agbno,
1102 __entry->len,
1103 __entry->lsn)
1104);
1105
1106TRACE_EVENT(xfs_agf,
1107 TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
1108 unsigned long caller_ip),
1109 TP_ARGS(mp, agf, flags, caller_ip),
1110 TP_STRUCT__entry(
1111 __field(dev_t, dev)
1112 __field(xfs_agnumber_t, agno)
1113 __field(int, flags)
1114 __field(__u32, length)
1115 __field(__u32, bno_root)
1116 __field(__u32, cnt_root)
1117 __field(__u32, bno_level)
1118 __field(__u32, cnt_level)
1119 __field(__u32, flfirst)
1120 __field(__u32, fllast)
1121 __field(__u32, flcount)
1122 __field(__u32, freeblks)
1123 __field(__u32, longest)
1124 __field(unsigned long, caller_ip)
1125 ),
1126 TP_fast_assign(
1127 __entry->dev = mp->m_super->s_dev;
1128 __entry->agno = be32_to_cpu(agf->agf_seqno),
1129 __entry->flags = flags;
1130 __entry->length = be32_to_cpu(agf->agf_length),
1131 __entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
1132 __entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
1133 __entry->bno_level =
1134 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
1135 __entry->cnt_level =
1136 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
1137 __entry->flfirst = be32_to_cpu(agf->agf_flfirst),
1138 __entry->fllast = be32_to_cpu(agf->agf_fllast),
1139 __entry->flcount = be32_to_cpu(agf->agf_flcount),
1140 __entry->freeblks = be32_to_cpu(agf->agf_freeblks),
1141 __entry->longest = be32_to_cpu(agf->agf_longest);
1142 __entry->caller_ip = caller_ip;
1143 ),
1144 TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
1145 "levels b %u c %u flfirst %u fllast %u flcount %u "
1146 "freeblks %u longest %u caller %pf",
1147 MAJOR(__entry->dev), MINOR(__entry->dev),
1148 __entry->agno,
1149 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
1150 __entry->length,
1151 __entry->bno_root,
1152 __entry->cnt_root,
1153 __entry->bno_level,
1154 __entry->cnt_level,
1155 __entry->flfirst,
1156 __entry->fllast,
1157 __entry->flcount,
1158 __entry->freeblks,
1159 __entry->longest,
1160 (void *)__entry->caller_ip)
1161);
1162
1163TRACE_EVENT(xfs_free_extent,
1164 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1165 xfs_extlen_t len, bool isfl, int haveleft, int haveright),
1166 TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright),
1167 TP_STRUCT__entry(
1168 __field(dev_t, dev)
1169 __field(xfs_agnumber_t, agno)
1170 __field(xfs_agblock_t, agbno)
1171 __field(xfs_extlen_t, len)
1172 __field(int, isfl)
1173 __field(int, haveleft)
1174 __field(int, haveright)
1175 ),
1176 TP_fast_assign(
1177 __entry->dev = mp->m_super->s_dev;
1178 __entry->agno = agno;
1179 __entry->agbno = agbno;
1180 __entry->len = len;
1181 __entry->isfl = isfl;
1182 __entry->haveleft = haveleft;
1183 __entry->haveright = haveright;
1184 ),
1185 TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s",
1186 MAJOR(__entry->dev), MINOR(__entry->dev),
1187 __entry->agno,
1188 __entry->agbno,
1189 __entry->len,
1190 __entry->isfl,
1191 __entry->haveleft ?
1192 (__entry->haveright ? "both" : "left") :
1193 (__entry->haveright ? "right" : "none"))
1194
1195);
1196
1197DECLARE_EVENT_CLASS(xfs_alloc_class,
1198 TP_PROTO(struct xfs_alloc_arg *args),
1199 TP_ARGS(args),
1200 TP_STRUCT__entry(
1201 __field(dev_t, dev)
1202 __field(xfs_agnumber_t, agno)
1203 __field(xfs_agblock_t, agbno)
1204 __field(xfs_extlen_t, minlen)
1205 __field(xfs_extlen_t, maxlen)
1206 __field(xfs_extlen_t, mod)
1207 __field(xfs_extlen_t, prod)
1208 __field(xfs_extlen_t, minleft)
1209 __field(xfs_extlen_t, total)
1210 __field(xfs_extlen_t, alignment)
1211 __field(xfs_extlen_t, minalignslop)
1212 __field(xfs_extlen_t, len)
1213 __field(short, type)
1214 __field(short, otype)
1215 __field(char, wasdel)
1216 __field(char, wasfromfl)
1217 __field(char, isfl)
1218 __field(char, userdata)
1219 __field(xfs_fsblock_t, firstblock)
1220 ),
1221 TP_fast_assign(
1222 __entry->dev = args->mp->m_super->s_dev;
1223 __entry->agno = args->agno;
1224 __entry->agbno = args->agbno;
1225 __entry->minlen = args->minlen;
1226 __entry->maxlen = args->maxlen;
1227 __entry->mod = args->mod;
1228 __entry->prod = args->prod;
1229 __entry->minleft = args->minleft;
1230 __entry->total = args->total;
1231 __entry->alignment = args->alignment;
1232 __entry->minalignslop = args->minalignslop;
1233 __entry->len = args->len;
1234 __entry->type = args->type;
1235 __entry->otype = args->otype;
1236 __entry->wasdel = args->wasdel;
1237 __entry->wasfromfl = args->wasfromfl;
1238 __entry->isfl = args->isfl;
1239 __entry->userdata = args->userdata;
1240 __entry->firstblock = args->firstblock;
1241 ),
1242 TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
1243 "prod %u minleft %u total %u alignment %u minalignslop %u "
1244 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
1245 "userdata %d firstblock 0x%llx",
1246 MAJOR(__entry->dev), MINOR(__entry->dev),
1247 __entry->agno,
1248 __entry->agbno,
1249 __entry->minlen,
1250 __entry->maxlen,
1251 __entry->mod,
1252 __entry->prod,
1253 __entry->minleft,
1254 __entry->total,
1255 __entry->alignment,
1256 __entry->minalignslop,
1257 __entry->len,
1258 __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
1259 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
1260 __entry->wasdel,
1261 __entry->wasfromfl,
1262 __entry->isfl,
1263 __entry->userdata,
1264 __entry->firstblock)
1265)
1266
1267#define DEFINE_ALLOC_EVENT(name) \
1268DEFINE_EVENT(xfs_alloc_class, name, \
1269 TP_PROTO(struct xfs_alloc_arg *args), \
1270 TP_ARGS(args))
1271DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
1272DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
1273DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
1274DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
1275DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
1276DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
1277DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
1278DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
1279DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
1280DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
1281DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
1282DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
1283DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
1284DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
1285DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
1286DEFINE_ALLOC_EVENT(xfs_alloc_small_error);
1287DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs);
1288DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix);
1289DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
1290DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
1291DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
1292
1293DECLARE_EVENT_CLASS(xfs_dir2_class,
1294 TP_PROTO(struct xfs_da_args *args),
1295 TP_ARGS(args),
1296 TP_STRUCT__entry(
1297 __field(dev_t, dev)
1298 __field(xfs_ino_t, ino)
1299 __dynamic_array(char, name, args->namelen)
1300 __field(int, namelen)
1301 __field(xfs_dahash_t, hashval)
1302 __field(xfs_ino_t, inumber)
1303 __field(int, op_flags)
1304 ),
1305 TP_fast_assign(
1306 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1307 __entry->ino = args->dp->i_ino;
1308 if (args->namelen)
1309 memcpy(__get_str(name), args->name, args->namelen);
1310 __entry->namelen = args->namelen;
1311 __entry->hashval = args->hashval;
1312 __entry->inumber = args->inumber;
1313 __entry->op_flags = args->op_flags;
1314 ),
1315 TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
1316 "inumber 0x%llx op_flags %s",
1317 MAJOR(__entry->dev), MINOR(__entry->dev),
1318 __entry->ino,
1319 __entry->namelen,
1320 __entry->namelen ? __get_str(name) : NULL,
1321 __entry->namelen,
1322 __entry->hashval,
1323 __entry->inumber,
1324 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
1325)
1326
1327#define DEFINE_DIR2_EVENT(name) \
1328DEFINE_EVENT(xfs_dir2_class, name, \
1329 TP_PROTO(struct xfs_da_args *args), \
1330 TP_ARGS(args))
1331DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
1332DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
1333DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
1334DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
1335DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
1336DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
1337DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
1338DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
1339DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
1340DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
1341DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
1342DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
1343DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
1344DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
1345DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
1346DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
1347DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
1348DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
1349DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
1350DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
1351DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
1352DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
1353DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
1354DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
1355DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
1356
1357DECLARE_EVENT_CLASS(xfs_dir2_space_class,
1358 TP_PROTO(struct xfs_da_args *args, int idx),
1359 TP_ARGS(args, idx),
1360 TP_STRUCT__entry(
1361 __field(dev_t, dev)
1362 __field(xfs_ino_t, ino)
1363 __field(int, op_flags)
1364 __field(int, idx)
1365 ),
1366 TP_fast_assign(
1367 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1368 __entry->ino = args->dp->i_ino;
1369 __entry->op_flags = args->op_flags;
1370 __entry->idx = idx;
1371 ),
1372 TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
1373 MAJOR(__entry->dev), MINOR(__entry->dev),
1374 __entry->ino,
1375 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
1376 __entry->idx)
1377)
1378
1379#define DEFINE_DIR2_SPACE_EVENT(name) \
1380DEFINE_EVENT(xfs_dir2_space_class, name, \
1381 TP_PROTO(struct xfs_da_args *args, int idx), \
1382 TP_ARGS(args, idx))
1383DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
1384DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
1385DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
1386DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
1387
1388TRACE_EVENT(xfs_dir2_leafn_moveents,
1389 TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
1390 TP_ARGS(args, src_idx, dst_idx, count),
1391 TP_STRUCT__entry(
1392 __field(dev_t, dev)
1393 __field(xfs_ino_t, ino)
1394 __field(int, op_flags)
1395 __field(int, src_idx)
1396 __field(int, dst_idx)
1397 __field(int, count)
1398 ),
1399 TP_fast_assign(
1400 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1401 __entry->ino = args->dp->i_ino;
1402 __entry->op_flags = args->op_flags;
1403 __entry->src_idx = src_idx;
1404 __entry->dst_idx = dst_idx;
1405 __entry->count = count;
1406 ),
1407 TP_printk("dev %d:%d ino 0x%llx op_flags %s "
1408 "src_idx %d dst_idx %d count %d",
1409 MAJOR(__entry->dev), MINOR(__entry->dev),
1410 __entry->ino,
1411 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
1412 __entry->src_idx,
1413 __entry->dst_idx,
1414 __entry->count)
1415);
1416
1417#endif /* _TRACE_XFS_H */
1418
1419#undef TRACE_INCLUDE_PATH
1420#define TRACE_INCLUDE_PATH .
1421#define TRACE_INCLUDE_FILE xfs_trace
1422#include <trace/define_trace.h>
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 00cabf5354d2..7c220b4227bc 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -39,6 +39,10 @@ struct attrlist_cursor_kern;
39#define IO_ISDIRECT 0x00004 /* bypass page cache */ 39#define IO_ISDIRECT 0x00004 /* bypass page cache */
40#define IO_INVIS 0x00020 /* don't update inode timestamps */ 40#define IO_INVIS 0x00020 /* don't update inode timestamps */
41 41
42#define XFS_IO_FLAGS \
43 { IO_ISDIRECT, "DIRECT" }, \
44 { IO_INVIS, "INVIS"}
45
42/* 46/*
43 * Flush/Invalidate options for vop_toss/flush/flushinval_pages. 47 * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
44 */ 48 */
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 497c7fb75cc1..0b1878857fc3 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -30,10 +30,10 @@
30 30
31 31
32static int 32static int
33__xfs_xattr_get(struct inode *inode, const char *name, 33xfs_xattr_get(struct dentry *dentry, const char *name,
34 void *value, size_t size, int xflags) 34 void *value, size_t size, int xflags)
35{ 35{
36 struct xfs_inode *ip = XFS_I(inode); 36 struct xfs_inode *ip = XFS_I(dentry->d_inode);
37 int error, asize = size; 37 int error, asize = size;
38 38
39 if (strcmp(name, "") == 0) 39 if (strcmp(name, "") == 0)
@@ -52,10 +52,10 @@ __xfs_xattr_get(struct inode *inode, const char *name,
52} 52}
53 53
54static int 54static int
55__xfs_xattr_set(struct inode *inode, const char *name, const void *value, 55xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
56 size_t size, int flags, int xflags) 56 size_t size, int flags, int xflags)
57{ 57{
58 struct xfs_inode *ip = XFS_I(inode); 58 struct xfs_inode *ip = XFS_I(dentry->d_inode);
59 59
60 if (strcmp(name, "") == 0) 60 if (strcmp(name, "") == 0)
61 return -EINVAL; 61 return -EINVAL;
@@ -71,75 +71,34 @@ __xfs_xattr_set(struct inode *inode, const char *name, const void *value,
71 return -xfs_attr_set(ip, name, (void *)value, size, xflags); 71 return -xfs_attr_set(ip, name, (void *)value, size, xflags);
72} 72}
73 73
74static int
75xfs_xattr_user_get(struct inode *inode, const char *name,
76 void *value, size_t size)
77{
78 return __xfs_xattr_get(inode, name, value, size, 0);
79}
80
81static int
82xfs_xattr_user_set(struct inode *inode, const char *name,
83 const void *value, size_t size, int flags)
84{
85 return __xfs_xattr_set(inode, name, value, size, flags, 0);
86}
87
88static struct xattr_handler xfs_xattr_user_handler = { 74static struct xattr_handler xfs_xattr_user_handler = {
89 .prefix = XATTR_USER_PREFIX, 75 .prefix = XATTR_USER_PREFIX,
90 .get = xfs_xattr_user_get, 76 .flags = 0, /* no flags implies user namespace */
91 .set = xfs_xattr_user_set, 77 .get = xfs_xattr_get,
78 .set = xfs_xattr_set,
92}; 79};
93 80
94
95static int
96xfs_xattr_trusted_get(struct inode *inode, const char *name,
97 void *value, size_t size)
98{
99 return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
100}
101
102static int
103xfs_xattr_trusted_set(struct inode *inode, const char *name,
104 const void *value, size_t size, int flags)
105{
106 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
107}
108
109static struct xattr_handler xfs_xattr_trusted_handler = { 81static struct xattr_handler xfs_xattr_trusted_handler = {
110 .prefix = XATTR_TRUSTED_PREFIX, 82 .prefix = XATTR_TRUSTED_PREFIX,
111 .get = xfs_xattr_trusted_get, 83 .flags = ATTR_ROOT,
112 .set = xfs_xattr_trusted_set, 84 .get = xfs_xattr_get,
85 .set = xfs_xattr_set,
113}; 86};
114 87
115
116static int
117xfs_xattr_secure_get(struct inode *inode, const char *name,
118 void *value, size_t size)
119{
120 return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
121}
122
123static int
124xfs_xattr_secure_set(struct inode *inode, const char *name,
125 const void *value, size_t size, int flags)
126{
127 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
128}
129
130static struct xattr_handler xfs_xattr_security_handler = { 88static struct xattr_handler xfs_xattr_security_handler = {
131 .prefix = XATTR_SECURITY_PREFIX, 89 .prefix = XATTR_SECURITY_PREFIX,
132 .get = xfs_xattr_secure_get, 90 .flags = ATTR_SECURE,
133 .set = xfs_xattr_secure_set, 91 .get = xfs_xattr_get,
92 .set = xfs_xattr_set,
134}; 93};
135 94
136
137struct xattr_handler *xfs_xattr_handlers[] = { 95struct xattr_handler *xfs_xattr_handlers[] = {
138 &xfs_xattr_user_handler, 96 &xfs_xattr_user_handler,
139 &xfs_xattr_trusted_handler, 97 &xfs_xattr_trusted_handler,
140 &xfs_xattr_security_handler, 98 &xfs_xattr_security_handler,
141#ifdef CONFIG_XFS_POSIX_ACL 99#ifdef CONFIG_XFS_POSIX_ACL
142 &xfs_xattr_system_handler, 100 &xfs_xattr_acl_access_handler,
101 &xfs_xattr_acl_default_handler,
143#endif 102#endif
144 NULL 103 NULL
145}; 104};
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 2f3f2229eaaf..d7c7eea09fc2 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -47,6 +47,7 @@
47#include "xfs_trans_space.h" 47#include "xfs_trans_space.h"
48#include "xfs_trans_priv.h" 48#include "xfs_trans_priv.h"
49#include "xfs_qm.h" 49#include "xfs_qm.h"
50#include "xfs_trace.h"
50 51
51 52
52/* 53/*
@@ -112,10 +113,7 @@ xfs_qm_dqinit(
112 init_completion(&dqp->q_flush); 113 init_completion(&dqp->q_flush);
113 complete(&dqp->q_flush); 114 complete(&dqp->q_flush);
114 115
115#ifdef XFS_DQUOT_TRACE 116 trace_xfs_dqinit(dqp);
116 dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_NOFS);
117 xfs_dqtrace_entry(dqp, "DQINIT");
118#endif
119 } else { 117 } else {
120 /* 118 /*
121 * Only the q_core portion was zeroed in dqreclaim_one(). 119 * Only the q_core portion was zeroed in dqreclaim_one().
@@ -136,10 +134,7 @@ xfs_qm_dqinit(
136 dqp->q_hash = NULL; 134 dqp->q_hash = NULL;
137 ASSERT(dqp->dq_flnext == dqp->dq_flprev); 135 ASSERT(dqp->dq_flnext == dqp->dq_flprev);
138 136
139#ifdef XFS_DQUOT_TRACE 137 trace_xfs_dqreuse(dqp);
140 ASSERT(dqp->q_trace);
141 xfs_dqtrace_entry(dqp, "DQRECLAIMED_INIT");
142#endif
143 } 138 }
144 139
145 /* 140 /*
@@ -167,13 +162,8 @@ xfs_qm_dqdestroy(
167 162
168 mutex_destroy(&dqp->q_qlock); 163 mutex_destroy(&dqp->q_qlock);
169 sv_destroy(&dqp->q_pinwait); 164 sv_destroy(&dqp->q_pinwait);
170
171#ifdef XFS_DQUOT_TRACE
172 if (dqp->q_trace)
173 ktrace_free(dqp->q_trace);
174 dqp->q_trace = NULL;
175#endif
176 kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); 165 kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
166
177 atomic_dec(&xfs_Gqm->qm_totaldquots); 167 atomic_dec(&xfs_Gqm->qm_totaldquots);
178} 168}
179 169
@@ -195,49 +185,6 @@ xfs_qm_dqinit_core(
195 d->dd_diskdq.d_flags = type; 185 d->dd_diskdq.d_flags = type;
196} 186}
197 187
198
199#ifdef XFS_DQUOT_TRACE
200/*
201 * Dquot tracing for debugging.
202 */
203/* ARGSUSED */
204void
205__xfs_dqtrace_entry(
206 xfs_dquot_t *dqp,
207 char *func,
208 void *retaddr,
209 xfs_inode_t *ip)
210{
211 xfs_dquot_t *udqp = NULL;
212 xfs_ino_t ino = 0;
213
214 ASSERT(dqp->q_trace);
215 if (ip) {
216 ino = ip->i_ino;
217 udqp = ip->i_udquot;
218 }
219 ktrace_enter(dqp->q_trace,
220 (void *)(__psint_t)DQUOT_KTRACE_ENTRY,
221 (void *)func,
222 (void *)(__psint_t)dqp->q_nrefs,
223 (void *)(__psint_t)dqp->dq_flags,
224 (void *)(__psint_t)dqp->q_res_bcount,
225 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_bcount),
226 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_icount),
227 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_hardlimit),
228 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_softlimit),
229 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_hardlimit),
230 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_softlimit),
231 (void *)(__psint_t)be32_to_cpu(dqp->q_core.d_id),
232 (void *)(__psint_t)current_pid(),
233 (void *)(__psint_t)ino,
234 (void *)(__psint_t)retaddr,
235 (void *)(__psint_t)udqp);
236 return;
237}
238#endif
239
240
241/* 188/*
242 * If default limits are in force, push them into the dquot now. 189 * If default limits are in force, push them into the dquot now.
243 * We overwrite the dquot limits only if they are zero and this 190 * We overwrite the dquot limits only if they are zero and this
@@ -425,7 +372,8 @@ xfs_qm_dqalloc(
425 xfs_trans_t *tp = *tpp; 372 xfs_trans_t *tp = *tpp;
426 373
427 ASSERT(tp != NULL); 374 ASSERT(tp != NULL);
428 xfs_dqtrace_entry(dqp, "DQALLOC"); 375
376 trace_xfs_dqalloc(dqp);
429 377
430 /* 378 /*
431 * Initialize the bmap freelist prior to calling bmapi code. 379 * Initialize the bmap freelist prior to calling bmapi code.
@@ -612,7 +560,8 @@ xfs_qm_dqtobp(
612 * (in which case we already have the buf). 560 * (in which case we already have the buf).
613 */ 561 */
614 if (! newdquot) { 562 if (! newdquot) {
615 xfs_dqtrace_entry(dqp, "DQTOBP READBUF"); 563 trace_xfs_dqtobp_read(dqp);
564
616 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 565 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
617 dqp->q_blkno, 566 dqp->q_blkno,
618 XFS_QI_DQCHUNKLEN(mp), 567 XFS_QI_DQCHUNKLEN(mp),
@@ -670,11 +619,12 @@ xfs_qm_dqread(
670 619
671 ASSERT(tpp); 620 ASSERT(tpp);
672 621
622 trace_xfs_dqread(dqp);
623
673 /* 624 /*
674 * get a pointer to the on-disk dquot and the buffer containing it 625 * get a pointer to the on-disk dquot and the buffer containing it
675 * dqp already knows its own type (GROUP/USER). 626 * dqp already knows its own type (GROUP/USER).
676 */ 627 */
677 xfs_dqtrace_entry(dqp, "DQREAD");
678 if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { 628 if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
679 return (error); 629 return (error);
680 } 630 }
@@ -763,7 +713,7 @@ xfs_qm_idtodq(
763 * or if the dquot didn't exist on disk and we ask to 713 * or if the dquot didn't exist on disk and we ask to
764 * allocate (ENOENT). 714 * allocate (ENOENT).
765 */ 715 */
766 xfs_dqtrace_entry(dqp, "DQREAD FAIL"); 716 trace_xfs_dqread_fail(dqp);
767 cancelflags |= XFS_TRANS_ABORT; 717 cancelflags |= XFS_TRANS_ABORT;
768 goto error0; 718 goto error0;
769 } 719 }
@@ -817,7 +767,8 @@ xfs_qm_dqlookup(
817 * id can't be modified without the hashlock anyway. 767 * id can't be modified without the hashlock anyway.
818 */ 768 */
819 if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) { 769 if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
820 xfs_dqtrace_entry(dqp, "DQFOUND BY LOOKUP"); 770 trace_xfs_dqlookup_found(dqp);
771
821 /* 772 /*
822 * All in core dquots must be on the dqlist of mp 773 * All in core dquots must be on the dqlist of mp
823 */ 774 */
@@ -827,7 +778,7 @@ xfs_qm_dqlookup(
827 if (dqp->q_nrefs == 0) { 778 if (dqp->q_nrefs == 0) {
828 ASSERT (XFS_DQ_IS_ON_FREELIST(dqp)); 779 ASSERT (XFS_DQ_IS_ON_FREELIST(dqp));
829 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) { 780 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
830 xfs_dqtrace_entry(dqp, "DQLOOKUP: WANT"); 781 trace_xfs_dqlookup_want(dqp);
831 782
832 /* 783 /*
833 * We may have raced with dqreclaim_one() 784 * We may have raced with dqreclaim_one()
@@ -857,8 +808,7 @@ xfs_qm_dqlookup(
857 /* 808 /*
858 * take it off the freelist 809 * take it off the freelist
859 */ 810 */
860 xfs_dqtrace_entry(dqp, 811 trace_xfs_dqlookup_freelist(dqp);
861 "DQLOOKUP: TAKEOFF FL");
862 XQM_FREELIST_REMOVE(dqp); 812 XQM_FREELIST_REMOVE(dqp);
863 /* xfs_qm_freelist_print(&(xfs_Gqm-> 813 /* xfs_qm_freelist_print(&(xfs_Gqm->
864 qm_dqfreelist), 814 qm_dqfreelist),
@@ -878,8 +828,7 @@ xfs_qm_dqlookup(
878 */ 828 */
879 ASSERT(mutex_is_locked(&qh->qh_lock)); 829 ASSERT(mutex_is_locked(&qh->qh_lock));
880 if (dqp->HL_PREVP != &qh->qh_next) { 830 if (dqp->HL_PREVP != &qh->qh_next) {
881 xfs_dqtrace_entry(dqp, 831 trace_xfs_dqlookup_move(dqp);
882 "DQLOOKUP: HASH MOVETOFRONT");
883 if ((d = dqp->HL_NEXT)) 832 if ((d = dqp->HL_NEXT))
884 d->HL_PREVP = dqp->HL_PREVP; 833 d->HL_PREVP = dqp->HL_PREVP;
885 *(dqp->HL_PREVP) = d; 834 *(dqp->HL_PREVP) = d;
@@ -889,7 +838,7 @@ xfs_qm_dqlookup(
889 dqp->HL_PREVP = &qh->qh_next; 838 dqp->HL_PREVP = &qh->qh_next;
890 qh->qh_next = dqp; 839 qh->qh_next = dqp;
891 } 840 }
892 xfs_dqtrace_entry(dqp, "LOOKUP END"); 841 trace_xfs_dqlookup_done(dqp);
893 *O_dqpp = dqp; 842 *O_dqpp = dqp;
894 ASSERT(mutex_is_locked(&qh->qh_lock)); 843 ASSERT(mutex_is_locked(&qh->qh_lock));
895 return (0); 844 return (0);
@@ -971,7 +920,7 @@ xfs_qm_dqget(
971 ASSERT(*O_dqpp); 920 ASSERT(*O_dqpp);
972 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); 921 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
973 mutex_unlock(&h->qh_lock); 922 mutex_unlock(&h->qh_lock);
974 xfs_dqtrace_entry(*O_dqpp, "DQGET DONE (FROM CACHE)"); 923 trace_xfs_dqget_hit(*O_dqpp);
975 return (0); /* success */ 924 return (0); /* success */
976 } 925 }
977 XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses); 926 XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
@@ -1104,7 +1053,7 @@ xfs_qm_dqget(
1104 mutex_unlock(&h->qh_lock); 1053 mutex_unlock(&h->qh_lock);
1105 dqret: 1054 dqret:
1106 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1055 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
1107 xfs_dqtrace_entry(dqp, "DQGET DONE"); 1056 trace_xfs_dqget_miss(dqp);
1108 *O_dqpp = dqp; 1057 *O_dqpp = dqp;
1109 return (0); 1058 return (0);
1110} 1059}
@@ -1124,7 +1073,8 @@ xfs_qm_dqput(
1124 1073
1125 ASSERT(dqp->q_nrefs > 0); 1074 ASSERT(dqp->q_nrefs > 0);
1126 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1075 ASSERT(XFS_DQ_IS_LOCKED(dqp));
1127 xfs_dqtrace_entry(dqp, "DQPUT"); 1076
1077 trace_xfs_dqput(dqp);
1128 1078
1129 if (dqp->q_nrefs != 1) { 1079 if (dqp->q_nrefs != 1) {
1130 dqp->q_nrefs--; 1080 dqp->q_nrefs--;
@@ -1137,7 +1087,7 @@ xfs_qm_dqput(
1137 * in the right order; but try to get it out-of-order first 1087 * in the right order; but try to get it out-of-order first
1138 */ 1088 */
1139 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) { 1089 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
1140 xfs_dqtrace_entry(dqp, "DQPUT: FLLOCK-WAIT"); 1090 trace_xfs_dqput_wait(dqp);
1141 xfs_dqunlock(dqp); 1091 xfs_dqunlock(dqp);
1142 xfs_qm_freelist_lock(xfs_Gqm); 1092 xfs_qm_freelist_lock(xfs_Gqm);
1143 xfs_dqlock(dqp); 1093 xfs_dqlock(dqp);
@@ -1148,7 +1098,8 @@ xfs_qm_dqput(
1148 1098
1149 /* We can't depend on nrefs being == 1 here */ 1099 /* We can't depend on nrefs being == 1 here */
1150 if (--dqp->q_nrefs == 0) { 1100 if (--dqp->q_nrefs == 0) {
1151 xfs_dqtrace_entry(dqp, "DQPUT: ON FREELIST"); 1101 trace_xfs_dqput_free(dqp);
1102
1152 /* 1103 /*
1153 * insert at end of the freelist. 1104 * insert at end of the freelist.
1154 */ 1105 */
@@ -1196,7 +1147,7 @@ xfs_qm_dqrele(
1196 if (!dqp) 1147 if (!dqp)
1197 return; 1148 return;
1198 1149
1199 xfs_dqtrace_entry(dqp, "DQRELE"); 1150 trace_xfs_dqrele(dqp);
1200 1151
1201 xfs_dqlock(dqp); 1152 xfs_dqlock(dqp);
1202 /* 1153 /*
@@ -1229,7 +1180,7 @@ xfs_qm_dqflush(
1229 1180
1230 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1181 ASSERT(XFS_DQ_IS_LOCKED(dqp));
1231 ASSERT(!completion_done(&dqp->q_flush)); 1182 ASSERT(!completion_done(&dqp->q_flush));
1232 xfs_dqtrace_entry(dqp, "DQFLUSH"); 1183 trace_xfs_dqflush(dqp);
1233 1184
1234 /* 1185 /*
1235 * If not dirty, or it's pinned and we are not supposed to 1186 * If not dirty, or it's pinned and we are not supposed to
@@ -1259,7 +1210,6 @@ xfs_qm_dqflush(
1259 * the ondisk-dquot has already been allocated for. 1210 * the ondisk-dquot has already been allocated for.
1260 */ 1211 */
1261 if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) { 1212 if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) {
1262 xfs_dqtrace_entry(dqp, "DQTOBP FAIL");
1263 ASSERT(error != ENOENT); 1213 ASSERT(error != ENOENT);
1264 /* 1214 /*
1265 * Quotas could have gotten turned off (ESRCH) 1215 * Quotas could have gotten turned off (ESRCH)
@@ -1297,7 +1247,7 @@ xfs_qm_dqflush(
1297 * get stuck waiting in the write for too long. 1247 * get stuck waiting in the write for too long.
1298 */ 1248 */
1299 if (XFS_BUF_ISPINNED(bp)) { 1249 if (XFS_BUF_ISPINNED(bp)) {
1300 xfs_dqtrace_entry(dqp, "DQFLUSH LOG FORCE"); 1250 trace_xfs_dqflush_force(dqp);
1301 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 1251 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
1302 } 1252 }
1303 1253
@@ -1308,7 +1258,9 @@ xfs_qm_dqflush(
1308 } else { 1258 } else {
1309 error = xfs_bwrite(mp, bp); 1259 error = xfs_bwrite(mp, bp);
1310 } 1260 }
1311 xfs_dqtrace_entry(dqp, "DQFLUSH END"); 1261
1262 trace_xfs_dqflush_done(dqp);
1263
1312 /* 1264 /*
1313 * dqp is still locked, but caller is free to unlock it now. 1265 * dqp is still locked, but caller is free to unlock it now.
1314 */ 1266 */
@@ -1483,7 +1435,7 @@ xfs_qm_dqpurge(
1483 */ 1435 */
1484 if (XFS_DQ_IS_DIRTY(dqp)) { 1436 if (XFS_DQ_IS_DIRTY(dqp)) {
1485 int error; 1437 int error;
1486 xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY"); 1438
1487 /* dqflush unlocks dqflock */ 1439 /* dqflush unlocks dqflock */
1488 /* 1440 /*
1489 * Given that dqpurge is a very rare occurrence, it is OK 1441 * Given that dqpurge is a very rare occurrence, it is OK
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index a2c16bcee90b..a0f7da586d1b 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -85,9 +85,6 @@ typedef struct xfs_dquot {
85 struct completion q_flush; /* flush completion queue */ 85 struct completion q_flush; /* flush completion queue */
86 atomic_t q_pincount; /* dquot pin count */ 86 atomic_t q_pincount; /* dquot pin count */
87 wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ 87 wait_queue_head_t q_pinwait; /* dquot pinning wait queue */
88#ifdef XFS_DQUOT_TRACE
89 struct ktrace *q_trace; /* trace header structure */
90#endif
91} xfs_dquot_t; 88} xfs_dquot_t;
92 89
93 90
@@ -144,24 +141,6 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
144 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ 141 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
145 (XFS_IS_OQUOTA_ON((d)->q_mount)))) 142 (XFS_IS_OQUOTA_ON((d)->q_mount))))
146 143
147#ifdef XFS_DQUOT_TRACE
148/*
149 * Dquot Tracing stuff.
150 */
151#define DQUOT_TRACE_SIZE 64
152#define DQUOT_KTRACE_ENTRY 1
153
154extern void __xfs_dqtrace_entry(xfs_dquot_t *dqp, char *func,
155 void *, xfs_inode_t *);
156#define xfs_dqtrace_entry_ino(a,b,ip) \
157 __xfs_dqtrace_entry((a), (b), (void*)__return_address, (ip))
158#define xfs_dqtrace_entry(a,b) \
159 __xfs_dqtrace_entry((a), (b), (void*)__return_address, NULL)
160#else
161#define xfs_dqtrace_entry(a,b)
162#define xfs_dqtrace_entry_ino(a,b,ip)
163#endif
164
165#ifdef QUOTADEBUG 144#ifdef QUOTADEBUG
166extern void xfs_qm_dqprint(xfs_dquot_t *); 145extern void xfs_qm_dqprint(xfs_dquot_t *);
167#else 146#else
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 45b1bfef7388..9e627a8b5b0e 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -47,6 +47,7 @@
47#include "xfs_trans_space.h" 47#include "xfs_trans_space.h"
48#include "xfs_utils.h" 48#include "xfs_utils.h"
49#include "xfs_qm.h" 49#include "xfs_qm.h"
50#include "xfs_trace.h"
50 51
51/* 52/*
52 * The global quota manager. There is only one of these for the entire 53 * The global quota manager. There is only one of these for the entire
@@ -453,7 +454,7 @@ again:
453 xfs_dqunlock(dqp); 454 xfs_dqunlock(dqp);
454 continue; 455 continue;
455 } 456 }
456 xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY"); 457
457 /* XXX a sentinel would be better */ 458 /* XXX a sentinel would be better */
458 recl = XFS_QI_MPLRECLAIMS(mp); 459 recl = XFS_QI_MPLRECLAIMS(mp);
459 if (!xfs_dqflock_nowait(dqp)) { 460 if (!xfs_dqflock_nowait(dqp)) {
@@ -651,7 +652,7 @@ xfs_qm_dqattach_one(
651 */ 652 */
652 dqp = *IO_idqpp; 653 dqp = *IO_idqpp;
653 if (dqp) { 654 if (dqp) {
654 xfs_dqtrace_entry(dqp, "DQATTACH: found in ip"); 655 trace_xfs_dqattach_found(dqp);
655 return 0; 656 return 0;
656 } 657 }
657 658
@@ -704,7 +705,7 @@ xfs_qm_dqattach_one(
704 if (error) 705 if (error)
705 return error; 706 return error;
706 707
707 xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget"); 708 trace_xfs_dqattach_get(dqp);
708 709
709 /* 710 /*
710 * dqget may have dropped and re-acquired the ilock, but it guarantees 711 * dqget may have dropped and re-acquired the ilock, but it guarantees
@@ -890,15 +891,15 @@ xfs_qm_dqdetach(
890 if (!(ip->i_udquot || ip->i_gdquot)) 891 if (!(ip->i_udquot || ip->i_gdquot))
891 return; 892 return;
892 893
894 trace_xfs_dquot_dqdetach(ip);
895
893 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); 896 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
894 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); 897 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
895 if (ip->i_udquot) { 898 if (ip->i_udquot) {
896 xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
897 xfs_qm_dqrele(ip->i_udquot); 899 xfs_qm_dqrele(ip->i_udquot);
898 ip->i_udquot = NULL; 900 ip->i_udquot = NULL;
899 } 901 }
900 if (ip->i_gdquot) { 902 if (ip->i_gdquot) {
901 xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip);
902 xfs_qm_dqrele(ip->i_gdquot); 903 xfs_qm_dqrele(ip->i_gdquot);
903 ip->i_gdquot = NULL; 904 ip->i_gdquot = NULL;
904 } 905 }
@@ -977,7 +978,6 @@ xfs_qm_sync(
977 * across a disk write 978 * across a disk write
978 */ 979 */
979 xfs_qm_mplist_unlock(mp); 980 xfs_qm_mplist_unlock(mp);
980 xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
981 error = xfs_qm_dqflush(dqp, flush_flags); 981 error = xfs_qm_dqflush(dqp, flush_flags);
982 xfs_dqunlock(dqp); 982 xfs_dqunlock(dqp);
983 if (error && XFS_FORCED_SHUTDOWN(mp)) 983 if (error && XFS_FORCED_SHUTDOWN(mp))
@@ -1350,7 +1350,8 @@ xfs_qm_reset_dqcounts(
1350 xfs_disk_dquot_t *ddq; 1350 xfs_disk_dquot_t *ddq;
1351 int j; 1351 int j;
1352 1352
1353 xfs_buftrace("RESET DQUOTS", bp); 1353 trace_xfs_reset_dqcounts(bp, _RET_IP_);
1354
1354 /* 1355 /*
1355 * Reset all counters and timers. They'll be 1356 * Reset all counters and timers. They'll be
1356 * started afresh by xfs_qm_quotacheck. 1357 * started afresh by xfs_qm_quotacheck.
@@ -1543,7 +1544,9 @@ xfs_qm_quotacheck_dqadjust(
1543 xfs_qcnt_t rtblks) 1544 xfs_qcnt_t rtblks)
1544{ 1545{
1545 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1546 ASSERT(XFS_DQ_IS_LOCKED(dqp));
1546 xfs_dqtrace_entry(dqp, "QCHECK DQADJUST"); 1547
1548 trace_xfs_dqadjust(dqp);
1549
1547 /* 1550 /*
1548 * Adjust the inode count and the block count to reflect this inode's 1551 * Adjust the inode count and the block count to reflect this inode's
1549 * resource usage. 1552 * resource usage.
@@ -1994,7 +1997,9 @@ xfs_qm_shake_freelist(
1994 */ 1997 */
1995 if (XFS_DQ_IS_DIRTY(dqp)) { 1998 if (XFS_DQ_IS_DIRTY(dqp)) {
1996 int error; 1999 int error;
1997 xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY"); 2000
2001 trace_xfs_dqshake_dirty(dqp);
2002
1998 /* 2003 /*
1999 * We flush it delayed write, so don't bother 2004 * We flush it delayed write, so don't bother
2000 * releasing the mplock. 2005 * releasing the mplock.
@@ -2038,7 +2043,9 @@ xfs_qm_shake_freelist(
2038 return nreclaimed; 2043 return nreclaimed;
2039 goto tryagain; 2044 goto tryagain;
2040 } 2045 }
2041 xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING"); 2046
2047 trace_xfs_dqshake_unlink(dqp);
2048
2042#ifdef QUOTADEBUG 2049#ifdef QUOTADEBUG
2043 cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n", 2050 cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
2044 dqp, be32_to_cpu(dqp->q_core.d_id)); 2051 dqp, be32_to_cpu(dqp->q_core.d_id));
@@ -2125,7 +2132,9 @@ xfs_qm_dqreclaim_one(void)
2125 */ 2132 */
2126 if (dqp->dq_flags & XFS_DQ_WANT) { 2133 if (dqp->dq_flags & XFS_DQ_WANT) {
2127 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); 2134 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
2128 xfs_dqtrace_entry(dqp, "DQRECLAIM: DQWANT"); 2135
2136 trace_xfs_dqreclaim_want(dqp);
2137
2129 xfs_dqunlock(dqp); 2138 xfs_dqunlock(dqp);
2130 xfs_qm_freelist_unlock(xfs_Gqm); 2139 xfs_qm_freelist_unlock(xfs_Gqm);
2131 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 2140 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
@@ -2171,7 +2180,9 @@ xfs_qm_dqreclaim_one(void)
2171 */ 2180 */
2172 if (XFS_DQ_IS_DIRTY(dqp)) { 2181 if (XFS_DQ_IS_DIRTY(dqp)) {
2173 int error; 2182 int error;
2174 xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY"); 2183
2184 trace_xfs_dqreclaim_dirty(dqp);
2185
2175 /* 2186 /*
2176 * We flush it delayed write, so don't bother 2187 * We flush it delayed write, so don't bother
2177 * releasing the freelist lock. 2188 * releasing the freelist lock.
@@ -2194,8 +2205,9 @@ xfs_qm_dqreclaim_one(void)
2194 if (!mutex_trylock(&dqp->q_hash->qh_lock)) 2205 if (!mutex_trylock(&dqp->q_hash->qh_lock))
2195 goto mplistunlock; 2206 goto mplistunlock;
2196 2207
2208 trace_xfs_dqreclaim_unlink(dqp);
2209
2197 ASSERT(dqp->q_nrefs == 0); 2210 ASSERT(dqp->q_nrefs == 0);
2198 xfs_dqtrace_entry(dqp, "DQRECLAIM: UNLINKING");
2199 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp); 2211 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2200 XQM_HASHLIST_REMOVE(dqp->q_hash, dqp); 2212 XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
2201 XQM_FREELIST_REMOVE(dqp); 2213 XQM_FREELIST_REMOVE(dqp);
@@ -2430,7 +2442,7 @@ xfs_qm_vop_dqalloc(
2430 } 2442 }
2431 } 2443 }
2432 if (uq) 2444 if (uq)
2433 xfs_dqtrace_entry_ino(uq, "DQALLOC", ip); 2445 trace_xfs_dquot_dqalloc(ip);
2434 2446
2435 xfs_iunlock(ip, lockflags); 2447 xfs_iunlock(ip, lockflags);
2436 if (O_udqpp) 2448 if (O_udqpp)
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 5d1a3b98a6e6..873e07e29074 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -49,6 +49,7 @@
49#include "xfs_buf_item.h" 49#include "xfs_buf_item.h"
50#include "xfs_utils.h" 50#include "xfs_utils.h"
51#include "xfs_qm.h" 51#include "xfs_qm.h"
52#include "xfs_trace.h"
52 53
53#ifdef DEBUG 54#ifdef DEBUG
54# define qdprintk(s, args...) cmn_err(CE_DEBUG, s, ## args) 55# define qdprintk(s, args...) cmn_err(CE_DEBUG, s, ## args)
@@ -496,7 +497,6 @@ xfs_qm_scall_setqlim(
496 ASSERT(error != ENOENT); 497 ASSERT(error != ENOENT);
497 return (error); 498 return (error);
498 } 499 }
499 xfs_dqtrace_entry(dqp, "Q_SETQLIM: AFT DQGET");
500 xfs_trans_dqjoin(tp, dqp); 500 xfs_trans_dqjoin(tp, dqp);
501 ddq = &dqp->q_core; 501 ddq = &dqp->q_core;
502 502
@@ -602,7 +602,6 @@ xfs_qm_scall_setqlim(
602 dqp->dq_flags |= XFS_DQ_DIRTY; 602 dqp->dq_flags |= XFS_DQ_DIRTY;
603 xfs_trans_log_dquot(tp, dqp); 603 xfs_trans_log_dquot(tp, dqp);
604 604
605 xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT");
606 error = xfs_trans_commit(tp, 0); 605 error = xfs_trans_commit(tp, 0);
607 xfs_qm_dqprint(dqp); 606 xfs_qm_dqprint(dqp);
608 xfs_qm_dqrele(dqp); 607 xfs_qm_dqrele(dqp);
@@ -630,7 +629,6 @@ xfs_qm_scall_getquota(
630 return (error); 629 return (error);
631 } 630 }
632 631
633 xfs_dqtrace_entry(dqp, "Q_GETQUOTA SUCCESS");
634 /* 632 /*
635 * If everything's NULL, this dquot doesn't quite exist as far as 633 * If everything's NULL, this dquot doesn't quite exist as far as
636 * our utility programs are concerned. 634 * our utility programs are concerned.
@@ -893,7 +891,7 @@ xfs_qm_dqrele_all_inodes(
893 uint flags) 891 uint flags)
894{ 892{
895 ASSERT(mp->m_quotainfo); 893 ASSERT(mp->m_quotainfo);
896 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG); 894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
897} 895}
898 896
899/*------------------------------------------------------------------------*/ 897/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
deleted file mode 100644
index 2d494c26717f..000000000000
--- a/fs/xfs/support/ktrace.c
+++ /dev/null
@@ -1,323 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include <xfs.h>
19
20static kmem_zone_t *ktrace_hdr_zone;
21static kmem_zone_t *ktrace_ent_zone;
22static int ktrace_zentries;
23
24void __init
25ktrace_init(int zentries)
26{
27 ktrace_zentries = roundup_pow_of_two(zentries);
28
29 ktrace_hdr_zone = kmem_zone_init(sizeof(ktrace_t),
30 "ktrace_hdr");
31 ASSERT(ktrace_hdr_zone);
32
33 ktrace_ent_zone = kmem_zone_init(ktrace_zentries
34 * sizeof(ktrace_entry_t),
35 "ktrace_ent");
36 ASSERT(ktrace_ent_zone);
37}
38
39void __exit
40ktrace_uninit(void)
41{
42 kmem_zone_destroy(ktrace_hdr_zone);
43 kmem_zone_destroy(ktrace_ent_zone);
44}
45
46/*
47 * ktrace_alloc()
48 *
49 * Allocate a ktrace header and enough buffering for the given
50 * number of entries. Round the number of entries up to a
51 * power of 2 so we can do fast masking to get the index from
52 * the atomic index counter.
53 */
54ktrace_t *
55ktrace_alloc(int nentries, unsigned int __nocast sleep)
56{
57 ktrace_t *ktp;
58 ktrace_entry_t *ktep;
59 int entries;
60
61 ktp = (ktrace_t*)kmem_zone_alloc(ktrace_hdr_zone, sleep);
62
63 if (ktp == (ktrace_t*)NULL) {
64 /*
65 * KM_SLEEP callers don't expect failure.
66 */
67 if (sleep & KM_SLEEP)
68 panic("ktrace_alloc: NULL memory on KM_SLEEP request!");
69
70 return NULL;
71 }
72
73 /*
74 * Special treatment for buffers with the ktrace_zentries entries
75 */
76 entries = roundup_pow_of_two(nentries);
77 if (entries == ktrace_zentries) {
78 ktep = (ktrace_entry_t*)kmem_zone_zalloc(ktrace_ent_zone,
79 sleep);
80 } else {
81 ktep = (ktrace_entry_t*)kmem_zalloc((entries * sizeof(*ktep)),
82 sleep | KM_LARGE);
83 }
84
85 if (ktep == NULL) {
86 /*
87 * KM_SLEEP callers don't expect failure.
88 */
89 if (sleep & KM_SLEEP)
90 panic("ktrace_alloc: NULL memory on KM_SLEEP request!");
91
92 kmem_free(ktp);
93
94 return NULL;
95 }
96
97 ktp->kt_entries = ktep;
98 ktp->kt_nentries = entries;
99 ASSERT(is_power_of_2(entries));
100 ktp->kt_index_mask = entries - 1;
101 atomic_set(&ktp->kt_index, 0);
102 ktp->kt_rollover = 0;
103 return ktp;
104}
105
106
107/*
108 * ktrace_free()
109 *
110 * Free up the ktrace header and buffer. It is up to the caller
111 * to ensure that no-one is referencing it.
112 */
113void
114ktrace_free(ktrace_t *ktp)
115{
116 if (ktp == (ktrace_t *)NULL)
117 return;
118
119 /*
120 * Special treatment for the Vnode trace buffer.
121 */
122 if (ktp->kt_nentries == ktrace_zentries)
123 kmem_zone_free(ktrace_ent_zone, ktp->kt_entries);
124 else
125 kmem_free(ktp->kt_entries);
126
127 kmem_zone_free(ktrace_hdr_zone, ktp);
128}
129
130
131/*
132 * Enter the given values into the "next" entry in the trace buffer.
133 * kt_index is always the index of the next entry to be filled.
134 */
135void
136ktrace_enter(
137 ktrace_t *ktp,
138 void *val0,
139 void *val1,
140 void *val2,
141 void *val3,
142 void *val4,
143 void *val5,
144 void *val6,
145 void *val7,
146 void *val8,
147 void *val9,
148 void *val10,
149 void *val11,
150 void *val12,
151 void *val13,
152 void *val14,
153 void *val15)
154{
155 int index;
156 ktrace_entry_t *ktep;
157
158 ASSERT(ktp != NULL);
159
160 /*
161 * Grab an entry by pushing the index up to the next one.
162 */
163 index = atomic_add_return(1, &ktp->kt_index);
164 index = (index - 1) & ktp->kt_index_mask;
165 if (!ktp->kt_rollover && index == ktp->kt_nentries - 1)
166 ktp->kt_rollover = 1;
167
168 ASSERT((index >= 0) && (index < ktp->kt_nentries));
169
170 ktep = &(ktp->kt_entries[index]);
171
172 ktep->val[0] = val0;
173 ktep->val[1] = val1;
174 ktep->val[2] = val2;
175 ktep->val[3] = val3;
176 ktep->val[4] = val4;
177 ktep->val[5] = val5;
178 ktep->val[6] = val6;
179 ktep->val[7] = val7;
180 ktep->val[8] = val8;
181 ktep->val[9] = val9;
182 ktep->val[10] = val10;
183 ktep->val[11] = val11;
184 ktep->val[12] = val12;
185 ktep->val[13] = val13;
186 ktep->val[14] = val14;
187 ktep->val[15] = val15;
188}
189
190/*
191 * Return the number of entries in the trace buffer.
192 */
193int
194ktrace_nentries(
195 ktrace_t *ktp)
196{
197 int index;
198 if (ktp == NULL)
199 return 0;
200
201 index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask;
202 return (ktp->kt_rollover ? ktp->kt_nentries : index);
203}
204
205/*
206 * ktrace_first()
207 *
208 * This is used to find the start of the trace buffer.
209 * In conjunction with ktrace_next() it can be used to
210 * iterate through the entire trace buffer. This code does
211 * not do any locking because it is assumed that it is called
212 * from the debugger.
213 *
214 * The caller must pass in a pointer to a ktrace_snap
215 * structure in which we will keep some state used to
216 * iterate through the buffer. This state must not touched
217 * by any code outside of this module.
218 */
219ktrace_entry_t *
220ktrace_first(ktrace_t *ktp, ktrace_snap_t *ktsp)
221{
222 ktrace_entry_t *ktep;
223 int index;
224 int nentries;
225
226 if (ktp->kt_rollover)
227 index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask;
228 else
229 index = 0;
230
231 ktsp->ks_start = index;
232 ktep = &(ktp->kt_entries[index]);
233
234 nentries = ktrace_nentries(ktp);
235 index++;
236 if (index < nentries) {
237 ktsp->ks_index = index;
238 } else {
239 ktsp->ks_index = 0;
240 if (index > nentries)
241 ktep = NULL;
242 }
243 return ktep;
244}
245
246/*
247 * ktrace_next()
248 *
249 * This is used to iterate through the entries of the given
250 * trace buffer. The caller must pass in the ktrace_snap_t
251 * structure initialized by ktrace_first(). The return value
252 * will be either a pointer to the next ktrace_entry or NULL
253 * if all of the entries have been traversed.
254 */
255ktrace_entry_t *
256ktrace_next(
257 ktrace_t *ktp,
258 ktrace_snap_t *ktsp)
259{
260 int index;
261 ktrace_entry_t *ktep;
262
263 index = ktsp->ks_index;
264 if (index == ktsp->ks_start) {
265 ktep = NULL;
266 } else {
267 ktep = &ktp->kt_entries[index];
268 }
269
270 index++;
271 if (index == ktrace_nentries(ktp)) {
272 ktsp->ks_index = 0;
273 } else {
274 ktsp->ks_index = index;
275 }
276
277 return ktep;
278}
279
280/*
281 * ktrace_skip()
282 *
283 * Skip the next "count" entries and return the entry after that.
284 * Return NULL if this causes us to iterate past the beginning again.
285 */
286ktrace_entry_t *
287ktrace_skip(
288 ktrace_t *ktp,
289 int count,
290 ktrace_snap_t *ktsp)
291{
292 int index;
293 int new_index;
294 ktrace_entry_t *ktep;
295 int nentries = ktrace_nentries(ktp);
296
297 index = ktsp->ks_index;
298 new_index = index + count;
299 while (new_index >= nentries) {
300 new_index -= nentries;
301 }
302 if (index == ktsp->ks_start) {
303 /*
304 * We've iterated around to the start, so we're done.
305 */
306 ktep = NULL;
307 } else if ((new_index < index) && (index < ktsp->ks_index)) {
308 /*
309 * We've skipped past the start again, so we're done.
310 */
311 ktep = NULL;
312 ktsp->ks_index = ktsp->ks_start;
313 } else {
314 ktep = &(ktp->kt_entries[new_index]);
315 new_index++;
316 if (new_index == nentries) {
317 ktsp->ks_index = 0;
318 } else {
319 ktsp->ks_index = new_index;
320 }
321 }
322 return ktep;
323}
diff --git a/fs/xfs/support/ktrace.h b/fs/xfs/support/ktrace.h
deleted file mode 100644
index 741d6947ca60..000000000000
--- a/fs/xfs/support/ktrace.h
+++ /dev/null
@@ -1,85 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_SUPPORT_KTRACE_H__
19#define __XFS_SUPPORT_KTRACE_H__
20
21/*
22 * Trace buffer entry structure.
23 */
24typedef struct ktrace_entry {
25 void *val[16];
26} ktrace_entry_t;
27
28/*
29 * Trace buffer header structure.
30 */
31typedef struct ktrace {
32 int kt_nentries; /* number of entries in trace buf */
33 atomic_t kt_index; /* current index in entries */
34 unsigned int kt_index_mask;
35 int kt_rollover;
36 ktrace_entry_t *kt_entries; /* buffer of entries */
37} ktrace_t;
38
39/*
40 * Trace buffer snapshot structure.
41 */
42typedef struct ktrace_snap {
43 int ks_start; /* kt_index at time of snap */
44 int ks_index; /* current index */
45} ktrace_snap_t;
46
47
48#ifdef CONFIG_XFS_TRACE
49
50extern void ktrace_init(int zentries);
51extern void ktrace_uninit(void);
52
53extern ktrace_t *ktrace_alloc(int, unsigned int __nocast);
54extern void ktrace_free(ktrace_t *);
55
56extern void ktrace_enter(
57 ktrace_t *,
58 void *,
59 void *,
60 void *,
61 void *,
62 void *,
63 void *,
64 void *,
65 void *,
66 void *,
67 void *,
68 void *,
69 void *,
70 void *,
71 void *,
72 void *,
73 void *);
74
75extern ktrace_entry_t *ktrace_first(ktrace_t *, ktrace_snap_t *);
76extern int ktrace_nentries(ktrace_t *);
77extern ktrace_entry_t *ktrace_next(ktrace_t *, ktrace_snap_t *);
78extern ktrace_entry_t *ktrace_skip(ktrace_t *, int, ktrace_snap_t *);
79
80#else
81#define ktrace_init(x) do { } while (0)
82#define ktrace_uninit() do { } while (0)
83#endif /* CONFIG_XFS_TRACE */
84
85#endif /* __XFS_SUPPORT_KTRACE_H__ */
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 17254b529c54..5ad8ad3a1dcd 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -25,21 +25,5 @@
25/* #define QUOTADEBUG 1 */ 25/* #define QUOTADEBUG 1 */
26#endif 26#endif
27 27
28#ifdef CONFIG_XFS_TRACE
29#define XFS_ALLOC_TRACE 1
30#define XFS_ATTR_TRACE 1
31#define XFS_BLI_TRACE 1
32#define XFS_BMAP_TRACE 1
33#define XFS_BTREE_TRACE 1
34#define XFS_DIR2_TRACE 1
35#define XFS_DQUOT_TRACE 1
36#define XFS_ILOCK_TRACE 1
37#define XFS_LOG_TRACE 1
38#define XFS_RW_TRACE 1
39#define XFS_BUF_TRACE 1
40#define XFS_INODE_TRACE 1
41#define XFS_FILESTREAMS_TRACE 1
42#endif
43
44#include <linux-2.6/xfs_linux.h> 28#include <linux-2.6/xfs_linux.h>
45#endif /* __XFS_H__ */ 29#endif /* __XFS_H__ */
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 947b150df8ed..00fd357c3e46 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -49,7 +49,8 @@ extern int xfs_acl_chmod(struct inode *inode);
49extern int posix_acl_access_exists(struct inode *inode); 49extern int posix_acl_access_exists(struct inode *inode);
50extern int posix_acl_default_exists(struct inode *inode); 50extern int posix_acl_default_exists(struct inode *inode);
51 51
52extern struct xattr_handler xfs_xattr_system_handler; 52extern struct xattr_handler xfs_xattr_acl_access_handler;
53extern struct xattr_handler xfs_xattr_acl_default_handler;
53#else 54#else
54# define xfs_check_acl NULL 55# define xfs_check_acl NULL
55# define xfs_get_acl(inode, type) NULL 56# define xfs_get_acl(inode, type) NULL
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index a5d54bf4931b..6702bd865811 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -86,6 +86,20 @@ typedef struct xfs_agf {
86#define XFS_AGF_NUM_BITS 12 86#define XFS_AGF_NUM_BITS 12
87#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) 87#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1)
88 88
89#define XFS_AGF_FLAGS \
90 { XFS_AGF_MAGICNUM, "MAGICNUM" }, \
91 { XFS_AGF_VERSIONNUM, "VERSIONNUM" }, \
92 { XFS_AGF_SEQNO, "SEQNO" }, \
93 { XFS_AGF_LENGTH, "LENGTH" }, \
94 { XFS_AGF_ROOTS, "ROOTS" }, \
95 { XFS_AGF_LEVELS, "LEVELS" }, \
96 { XFS_AGF_FLFIRST, "FLFIRST" }, \
97 { XFS_AGF_FLLAST, "FLLAST" }, \
98 { XFS_AGF_FLCOUNT, "FLCOUNT" }, \
99 { XFS_AGF_FREEBLKS, "FREEBLKS" }, \
100 { XFS_AGF_LONGEST, "LONGEST" }, \
101 { XFS_AGF_BTREEBLKS, "BTREEBLKS" }
102
89/* disk block (xfs_daddr_t) in the AG */ 103/* disk block (xfs_daddr_t) in the AG */
90#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) 104#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
91#define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) 105#define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp))
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 2cf944eb796d..275b1f4f9430 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -38,6 +38,7 @@
38#include "xfs_ialloc.h" 38#include "xfs_ialloc.h"
39#include "xfs_alloc.h" 39#include "xfs_alloc.h"
40#include "xfs_error.h" 40#include "xfs_error.h"
41#include "xfs_trace.h"
41 42
42 43
43#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) 44#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
@@ -51,30 +52,6 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
51 xfs_agblock_t bno, 52 xfs_agblock_t bno,
52 xfs_extlen_t len); 53 xfs_extlen_t len);
53 54
54#if defined(XFS_ALLOC_TRACE)
55ktrace_t *xfs_alloc_trace_buf;
56
57#define TRACE_ALLOC(s,a) \
58 xfs_alloc_trace_alloc(__func__, s, a, __LINE__)
59#define TRACE_FREE(s,a,b,x,f) \
60 xfs_alloc_trace_free(__func__, s, mp, a, b, x, f, __LINE__)
61#define TRACE_MODAGF(s,a,f) \
62 xfs_alloc_trace_modagf(__func__, s, mp, a, f, __LINE__)
63#define TRACE_BUSY(__func__,s,ag,agb,l,sl,tp) \
64 xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
65#define TRACE_UNBUSY(__func__,s,ag,sl,tp) \
66 xfs_alloc_trace_busy(__func__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
67#define TRACE_BUSYSEARCH(__func__,s,ag,agb,l,tp) \
68 xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, 0, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
69#else
70#define TRACE_ALLOC(s,a)
71#define TRACE_FREE(s,a,b,x,f)
72#define TRACE_MODAGF(s,a,f)
73#define TRACE_BUSY(s,a,ag,agb,l,sl,tp)
74#define TRACE_UNBUSY(fname,s,ag,sl,tp)
75#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,tp)
76#endif /* XFS_ALLOC_TRACE */
77
78/* 55/*
79 * Prototypes for per-ag allocation routines 56 * Prototypes for per-ag allocation routines
80 */ 57 */
@@ -498,124 +475,6 @@ xfs_alloc_read_agfl(
498 return 0; 475 return 0;
499} 476}
500 477
501#if defined(XFS_ALLOC_TRACE)
502/*
503 * Add an allocation trace entry for an alloc call.
504 */
505STATIC void
506xfs_alloc_trace_alloc(
507 const char *name, /* function tag string */
508 char *str, /* additional string */
509 xfs_alloc_arg_t *args, /* allocation argument structure */
510 int line) /* source line number */
511{
512 ktrace_enter(xfs_alloc_trace_buf,
513 (void *)(__psint_t)(XFS_ALLOC_KTRACE_ALLOC | (line << 16)),
514 (void *)name,
515 (void *)str,
516 (void *)args->mp,
517 (void *)(__psunsigned_t)args->agno,
518 (void *)(__psunsigned_t)args->agbno,
519 (void *)(__psunsigned_t)args->minlen,
520 (void *)(__psunsigned_t)args->maxlen,
521 (void *)(__psunsigned_t)args->mod,
522 (void *)(__psunsigned_t)args->prod,
523 (void *)(__psunsigned_t)args->minleft,
524 (void *)(__psunsigned_t)args->total,
525 (void *)(__psunsigned_t)args->alignment,
526 (void *)(__psunsigned_t)args->len,
527 (void *)((((__psint_t)args->type) << 16) |
528 (__psint_t)args->otype),
529 (void *)(__psint_t)((args->wasdel << 3) |
530 (args->wasfromfl << 2) |
531 (args->isfl << 1) |
532 (args->userdata << 0)));
533}
534
535/*
536 * Add an allocation trace entry for a free call.
537 */
538STATIC void
539xfs_alloc_trace_free(
540 const char *name, /* function tag string */
541 char *str, /* additional string */
542 xfs_mount_t *mp, /* file system mount point */
543 xfs_agnumber_t agno, /* allocation group number */
544 xfs_agblock_t agbno, /* a.g. relative block number */
545 xfs_extlen_t len, /* length of extent */
546 int isfl, /* set if is freelist allocation/free */
547 int line) /* source line number */
548{
549 ktrace_enter(xfs_alloc_trace_buf,
550 (void *)(__psint_t)(XFS_ALLOC_KTRACE_FREE | (line << 16)),
551 (void *)name,
552 (void *)str,
553 (void *)mp,
554 (void *)(__psunsigned_t)agno,
555 (void *)(__psunsigned_t)agbno,
556 (void *)(__psunsigned_t)len,
557 (void *)(__psint_t)isfl,
558 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
559}
560
561/*
562 * Add an allocation trace entry for modifying an agf.
563 */
564STATIC void
565xfs_alloc_trace_modagf(
566 const char *name, /* function tag string */
567 char *str, /* additional string */
568 xfs_mount_t *mp, /* file system mount point */
569 xfs_agf_t *agf, /* new agf value */
570 int flags, /* logging flags for agf */
571 int line) /* source line number */
572{
573 ktrace_enter(xfs_alloc_trace_buf,
574 (void *)(__psint_t)(XFS_ALLOC_KTRACE_MODAGF | (line << 16)),
575 (void *)name,
576 (void *)str,
577 (void *)mp,
578 (void *)(__psint_t)flags,
579 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_seqno),
580 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_length),
581 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
582 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
583 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
584 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
585 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flfirst),
586 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_fllast),
587 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flcount),
588 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_freeblks),
589 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_longest));
590}
591
592STATIC void
593xfs_alloc_trace_busy(
594 const char *name, /* function tag string */
595 char *str, /* additional string */
596 xfs_mount_t *mp, /* file system mount point */
597 xfs_agnumber_t agno, /* allocation group number */
598 xfs_agblock_t agbno, /* a.g. relative block number */
599 xfs_extlen_t len, /* length of extent */
600 int slot, /* perag Busy slot */
601 xfs_trans_t *tp,
602 int trtype, /* type: add, delete, search */
603 int line) /* source line number */
604{
605 ktrace_enter(xfs_alloc_trace_buf,
606 (void *)(__psint_t)(trtype | (line << 16)),
607 (void *)name,
608 (void *)str,
609 (void *)mp,
610 (void *)(__psunsigned_t)agno,
611 (void *)(__psunsigned_t)agbno,
612 (void *)(__psunsigned_t)len,
613 (void *)(__psint_t)slot,
614 (void *)tp,
615 NULL, NULL, NULL, NULL, NULL, NULL, NULL);
616}
617#endif /* XFS_ALLOC_TRACE */
618
619/* 478/*
620 * Allocation group level functions. 479 * Allocation group level functions.
621 */ 480 */
@@ -665,9 +524,6 @@ xfs_alloc_ag_vextent(
665 */ 524 */
666 if (args->agbno != NULLAGBLOCK) { 525 if (args->agbno != NULLAGBLOCK) {
667 xfs_agf_t *agf; /* allocation group freelist header */ 526 xfs_agf_t *agf; /* allocation group freelist header */
668#ifdef XFS_ALLOC_TRACE
669 xfs_mount_t *mp = args->mp;
670#endif
671 long slen = (long)args->len; 527 long slen = (long)args->len;
672 528
673 ASSERT(args->len >= args->minlen && args->len <= args->maxlen); 529 ASSERT(args->len >= args->minlen && args->len <= args->maxlen);
@@ -682,7 +538,6 @@ xfs_alloc_ag_vextent(
682 args->pag->pagf_freeblks -= args->len; 538 args->pag->pagf_freeblks -= args->len;
683 ASSERT(be32_to_cpu(agf->agf_freeblks) <= 539 ASSERT(be32_to_cpu(agf->agf_freeblks) <=
684 be32_to_cpu(agf->agf_length)); 540 be32_to_cpu(agf->agf_length));
685 TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
686 xfs_alloc_log_agf(args->tp, args->agbp, 541 xfs_alloc_log_agf(args->tp, args->agbp,
687 XFS_AGF_FREEBLKS); 542 XFS_AGF_FREEBLKS);
688 /* search the busylist for these blocks */ 543 /* search the busylist for these blocks */
@@ -792,13 +647,14 @@ xfs_alloc_ag_vextent_exact(
792 } 647 }
793 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 648 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
794 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 649 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
795 TRACE_ALLOC("normal", args); 650
651 trace_xfs_alloc_exact_done(args);
796 args->wasfromfl = 0; 652 args->wasfromfl = 0;
797 return 0; 653 return 0;
798 654
799error0: 655error0:
800 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 656 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
801 TRACE_ALLOC("error", args); 657 trace_xfs_alloc_exact_error(args);
802 return error; 658 return error;
803} 659}
804 660
@@ -958,7 +814,7 @@ xfs_alloc_ag_vextent_near(
958 args->len = blen; 814 args->len = blen;
959 if (!xfs_alloc_fix_minleft(args)) { 815 if (!xfs_alloc_fix_minleft(args)) {
960 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 816 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
961 TRACE_ALLOC("nominleft", args); 817 trace_xfs_alloc_near_nominleft(args);
962 return 0; 818 return 0;
963 } 819 }
964 blen = args->len; 820 blen = args->len;
@@ -981,7 +837,8 @@ xfs_alloc_ag_vextent_near(
981 goto error0; 837 goto error0;
982 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 838 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
983 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 839 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
984 TRACE_ALLOC("first", args); 840
841 trace_xfs_alloc_near_first(args);
985 return 0; 842 return 0;
986 } 843 }
987 /* 844 /*
@@ -1272,7 +1129,7 @@ xfs_alloc_ag_vextent_near(
1272 * If we couldn't get anything, give up. 1129 * If we couldn't get anything, give up.
1273 */ 1130 */
1274 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1131 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1275 TRACE_ALLOC("neither", args); 1132 trace_xfs_alloc_size_neither(args);
1276 args->agbno = NULLAGBLOCK; 1133 args->agbno = NULLAGBLOCK;
1277 return 0; 1134 return 0;
1278 } 1135 }
@@ -1299,7 +1156,7 @@ xfs_alloc_ag_vextent_near(
1299 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1156 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1300 xfs_alloc_fix_len(args); 1157 xfs_alloc_fix_len(args);
1301 if (!xfs_alloc_fix_minleft(args)) { 1158 if (!xfs_alloc_fix_minleft(args)) {
1302 TRACE_ALLOC("nominleft", args); 1159 trace_xfs_alloc_near_nominleft(args);
1303 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1160 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1304 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1161 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1305 return 0; 1162 return 0;
@@ -1314,13 +1171,18 @@ xfs_alloc_ag_vextent_near(
1314 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1171 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1315 ltnew, rlen, XFSA_FIXUP_BNO_OK))) 1172 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1316 goto error0; 1173 goto error0;
1317 TRACE_ALLOC(j ? "gt" : "lt", args); 1174
1175 if (j)
1176 trace_xfs_alloc_near_greater(args);
1177 else
1178 trace_xfs_alloc_near_lesser(args);
1179
1318 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1180 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1319 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1181 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1320 return 0; 1182 return 0;
1321 1183
1322 error0: 1184 error0:
1323 TRACE_ALLOC("error", args); 1185 trace_xfs_alloc_near_error(args);
1324 if (cnt_cur != NULL) 1186 if (cnt_cur != NULL)
1325 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1187 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1326 if (bno_cur_lt != NULL) 1188 if (bno_cur_lt != NULL)
@@ -1371,7 +1233,7 @@ xfs_alloc_ag_vextent_size(
1371 goto error0; 1233 goto error0;
1372 if (i == 0 || flen == 0) { 1234 if (i == 0 || flen == 0) {
1373 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1235 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1374 TRACE_ALLOC("noentry", args); 1236 trace_xfs_alloc_size_noentry(args);
1375 return 0; 1237 return 0;
1376 } 1238 }
1377 ASSERT(i == 1); 1239 ASSERT(i == 1);
@@ -1448,7 +1310,7 @@ xfs_alloc_ag_vextent_size(
1448 xfs_alloc_fix_len(args); 1310 xfs_alloc_fix_len(args);
1449 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { 1311 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
1450 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1312 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1451 TRACE_ALLOC("nominleft", args); 1313 trace_xfs_alloc_size_nominleft(args);
1452 args->agbno = NULLAGBLOCK; 1314 args->agbno = NULLAGBLOCK;
1453 return 0; 1315 return 0;
1454 } 1316 }
@@ -1471,11 +1333,11 @@ xfs_alloc_ag_vextent_size(
1471 args->agbno + args->len <= 1333 args->agbno + args->len <=
1472 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1334 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1473 error0); 1335 error0);
1474 TRACE_ALLOC("normal", args); 1336 trace_xfs_alloc_size_done(args);
1475 return 0; 1337 return 0;
1476 1338
1477error0: 1339error0:
1478 TRACE_ALLOC("error", args); 1340 trace_xfs_alloc_size_error(args);
1479 if (cnt_cur) 1341 if (cnt_cur)
1480 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1342 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1481 if (bno_cur) 1343 if (bno_cur)
@@ -1534,7 +1396,7 @@ xfs_alloc_ag_vextent_small(
1534 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1396 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1535 error0); 1397 error0);
1536 args->wasfromfl = 1; 1398 args->wasfromfl = 1;
1537 TRACE_ALLOC("freelist", args); 1399 trace_xfs_alloc_small_freelist(args);
1538 *stat = 0; 1400 *stat = 0;
1539 return 0; 1401 return 0;
1540 } 1402 }
@@ -1556,17 +1418,17 @@ xfs_alloc_ag_vextent_small(
1556 */ 1418 */
1557 if (flen < args->minlen) { 1419 if (flen < args->minlen) {
1558 args->agbno = NULLAGBLOCK; 1420 args->agbno = NULLAGBLOCK;
1559 TRACE_ALLOC("notenough", args); 1421 trace_xfs_alloc_small_notenough(args);
1560 flen = 0; 1422 flen = 0;
1561 } 1423 }
1562 *fbnop = fbno; 1424 *fbnop = fbno;
1563 *flenp = flen; 1425 *flenp = flen;
1564 *stat = 1; 1426 *stat = 1;
1565 TRACE_ALLOC("normal", args); 1427 trace_xfs_alloc_small_done(args);
1566 return 0; 1428 return 0;
1567 1429
1568error0: 1430error0:
1569 TRACE_ALLOC("error", args); 1431 trace_xfs_alloc_small_error(args);
1570 return error; 1432 return error;
1571} 1433}
1572 1434
@@ -1809,17 +1671,14 @@ xfs_free_ag_extent(
1809 be32_to_cpu(agf->agf_freeblks) <= 1671 be32_to_cpu(agf->agf_freeblks) <=
1810 be32_to_cpu(agf->agf_length), 1672 be32_to_cpu(agf->agf_length),
1811 error0); 1673 error0);
1812 TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
1813 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); 1674 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
1814 if (!isfl) 1675 if (!isfl)
1815 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); 1676 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1816 XFS_STATS_INC(xs_freex); 1677 XFS_STATS_INC(xs_freex);
1817 XFS_STATS_ADD(xs_freeb, len); 1678 XFS_STATS_ADD(xs_freeb, len);
1818 } 1679 }
1819 TRACE_FREE(haveleft ? 1680
1820 (haveright ? "both" : "left") : 1681 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1821 (haveright ? "right" : "none"),
1822 agno, bno, len, isfl);
1823 1682
1824 /* 1683 /*
1825 * Since blocks move to the free list without the coordination 1684 * Since blocks move to the free list without the coordination
@@ -1836,7 +1695,7 @@ xfs_free_ag_extent(
1836 return 0; 1695 return 0;
1837 1696
1838 error0: 1697 error0:
1839 TRACE_FREE("error", agno, bno, len, isfl); 1698 trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
1840 if (bno_cur) 1699 if (bno_cur)
1841 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1700 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1842 if (cnt_cur) 1701 if (cnt_cur)
@@ -2122,7 +1981,6 @@ xfs_alloc_get_freelist(
2122 logflags |= XFS_AGF_BTREEBLKS; 1981 logflags |= XFS_AGF_BTREEBLKS;
2123 } 1982 }
2124 1983
2125 TRACE_MODAGF(NULL, agf, logflags);
2126 xfs_alloc_log_agf(tp, agbp, logflags); 1984 xfs_alloc_log_agf(tp, agbp, logflags);
2127 *bnop = bno; 1985 *bnop = bno;
2128 1986
@@ -2165,6 +2023,8 @@ xfs_alloc_log_agf(
2165 sizeof(xfs_agf_t) 2023 sizeof(xfs_agf_t)
2166 }; 2024 };
2167 2025
2026 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2027
2168 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); 2028 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2169 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); 2029 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2170} 2030}
@@ -2230,13 +2090,11 @@ xfs_alloc_put_freelist(
2230 logflags |= XFS_AGF_BTREEBLKS; 2090 logflags |= XFS_AGF_BTREEBLKS;
2231 } 2091 }
2232 2092
2233 TRACE_MODAGF(NULL, agf, logflags);
2234 xfs_alloc_log_agf(tp, agbp, logflags); 2093 xfs_alloc_log_agf(tp, agbp, logflags);
2235 2094
2236 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); 2095 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2237 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2096 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
2238 *blockp = cpu_to_be32(bno); 2097 *blockp = cpu_to_be32(bno);
2239 TRACE_MODAGF(NULL, agf, logflags);
2240 xfs_alloc_log_agf(tp, agbp, logflags); 2098 xfs_alloc_log_agf(tp, agbp, logflags);
2241 xfs_trans_log_buf(tp, agflbp, 2099 xfs_trans_log_buf(tp, agflbp,
2242 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl), 2100 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
@@ -2399,7 +2257,7 @@ xfs_alloc_vextent(
2399 args->minlen > args->maxlen || args->minlen > agsize || 2257 args->minlen > args->maxlen || args->minlen > agsize ||
2400 args->mod >= args->prod) { 2258 args->mod >= args->prod) {
2401 args->fsbno = NULLFSBLOCK; 2259 args->fsbno = NULLFSBLOCK;
2402 TRACE_ALLOC("badargs", args); 2260 trace_xfs_alloc_vextent_badargs(args);
2403 return 0; 2261 return 0;
2404 } 2262 }
2405 minleft = args->minleft; 2263 minleft = args->minleft;
@@ -2418,12 +2276,12 @@ xfs_alloc_vextent(
2418 error = xfs_alloc_fix_freelist(args, 0); 2276 error = xfs_alloc_fix_freelist(args, 0);
2419 args->minleft = minleft; 2277 args->minleft = minleft;
2420 if (error) { 2278 if (error) {
2421 TRACE_ALLOC("nofix", args); 2279 trace_xfs_alloc_vextent_nofix(args);
2422 goto error0; 2280 goto error0;
2423 } 2281 }
2424 if (!args->agbp) { 2282 if (!args->agbp) {
2425 up_read(&mp->m_peraglock); 2283 up_read(&mp->m_peraglock);
2426 TRACE_ALLOC("noagbp", args); 2284 trace_xfs_alloc_vextent_noagbp(args);
2427 break; 2285 break;
2428 } 2286 }
2429 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2287 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
@@ -2488,7 +2346,7 @@ xfs_alloc_vextent(
2488 error = xfs_alloc_fix_freelist(args, flags); 2346 error = xfs_alloc_fix_freelist(args, flags);
2489 args->minleft = minleft; 2347 args->minleft = minleft;
2490 if (error) { 2348 if (error) {
2491 TRACE_ALLOC("nofix", args); 2349 trace_xfs_alloc_vextent_nofix(args);
2492 goto error0; 2350 goto error0;
2493 } 2351 }
2494 /* 2352 /*
@@ -2499,7 +2357,9 @@ xfs_alloc_vextent(
2499 goto error0; 2357 goto error0;
2500 break; 2358 break;
2501 } 2359 }
2502 TRACE_ALLOC("loopfailed", args); 2360
2361 trace_xfs_alloc_vextent_loopfailed(args);
2362
2503 /* 2363 /*
2504 * Didn't work, figure out the next iteration. 2364 * Didn't work, figure out the next iteration.
2505 */ 2365 */
@@ -2526,7 +2386,7 @@ xfs_alloc_vextent(
2526 if (args->agno == sagno) { 2386 if (args->agno == sagno) {
2527 if (no_min == 1) { 2387 if (no_min == 1) {
2528 args->agbno = NULLAGBLOCK; 2388 args->agbno = NULLAGBLOCK;
2529 TRACE_ALLOC("allfailed", args); 2389 trace_xfs_alloc_vextent_allfailed(args);
2530 break; 2390 break;
2531 } 2391 }
2532 if (flags == 0) { 2392 if (flags == 0) {
@@ -2642,16 +2502,16 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
2642 } 2502 }
2643 } 2503 }
2644 2504
2505 trace_xfs_alloc_busy(mp, agno, bno, len, n);
2506
2645 if (n < XFS_PAGB_NUM_SLOTS) { 2507 if (n < XFS_PAGB_NUM_SLOTS) {
2646 bsy = &mp->m_perag[agno].pagb_list[n]; 2508 bsy = &mp->m_perag[agno].pagb_list[n];
2647 mp->m_perag[agno].pagb_count++; 2509 mp->m_perag[agno].pagb_count++;
2648 TRACE_BUSY("xfs_alloc_mark_busy", "got", agno, bno, len, n, tp);
2649 bsy->busy_start = bno; 2510 bsy->busy_start = bno;
2650 bsy->busy_length = len; 2511 bsy->busy_length = len;
2651 bsy->busy_tp = tp; 2512 bsy->busy_tp = tp;
2652 xfs_trans_add_busy(tp, agno, n); 2513 xfs_trans_add_busy(tp, agno, n);
2653 } else { 2514 } else {
2654 TRACE_BUSY("xfs_alloc_mark_busy", "FULL", agno, bno, len, -1, tp);
2655 /* 2515 /*
2656 * The busy list is full! Since it is now not possible to 2516 * The busy list is full! Since it is now not possible to
2657 * track the free block, make this a synchronous transaction 2517 * track the free block, make this a synchronous transaction
@@ -2678,12 +2538,12 @@ xfs_alloc_clear_busy(xfs_trans_t *tp,
2678 list = mp->m_perag[agno].pagb_list; 2538 list = mp->m_perag[agno].pagb_list;
2679 2539
2680 ASSERT(idx < XFS_PAGB_NUM_SLOTS); 2540 ASSERT(idx < XFS_PAGB_NUM_SLOTS);
2541
2542 trace_xfs_alloc_unbusy(mp, agno, idx, list[idx].busy_tp == tp);
2543
2681 if (list[idx].busy_tp == tp) { 2544 if (list[idx].busy_tp == tp) {
2682 TRACE_UNBUSY("xfs_alloc_clear_busy", "found", agno, idx, tp);
2683 list[idx].busy_tp = NULL; 2545 list[idx].busy_tp = NULL;
2684 mp->m_perag[agno].pagb_count--; 2546 mp->m_perag[agno].pagb_count--;
2685 } else {
2686 TRACE_UNBUSY("xfs_alloc_clear_busy", "missing", agno, idx, tp);
2687 } 2547 }
2688 2548
2689 spin_unlock(&mp->m_perag[agno].pagb_lock); 2549 spin_unlock(&mp->m_perag[agno].pagb_lock);
@@ -2703,45 +2563,41 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2703 xfs_mount_t *mp; 2563 xfs_mount_t *mp;
2704 xfs_perag_busy_t *bsy; 2564 xfs_perag_busy_t *bsy;
2705 xfs_agblock_t uend, bend; 2565 xfs_agblock_t uend, bend;
2706 xfs_lsn_t lsn; 2566 xfs_lsn_t lsn = 0;
2707 int cnt; 2567 int cnt;
2708 2568
2709 mp = tp->t_mountp; 2569 mp = tp->t_mountp;
2710 2570
2711 spin_lock(&mp->m_perag[agno].pagb_lock); 2571 spin_lock(&mp->m_perag[agno].pagb_lock);
2712 cnt = mp->m_perag[agno].pagb_count;
2713 2572
2714 uend = bno + len - 1; 2573 uend = bno + len - 1;
2715 2574
2716 /* search pagb_list for this slot, skipping open slots */ 2575 /*
2717 for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) { 2576 * search pagb_list for this slot, skipping open slots. We have to
2577 * search the entire array as there may be multiple overlaps and
2578 * we have to get the most recent LSN for the log force to push out
2579 * all the transactions that span the range.
2580 */
2581 for (cnt = 0; cnt < mp->m_perag[agno].pagb_count; cnt++) {
2582 bsy = &mp->m_perag[agno].pagb_list[cnt];
2583 if (!bsy->busy_tp)
2584 continue;
2718 2585
2719 /* 2586 bend = bsy->busy_start + bsy->busy_length - 1;
2720 * (start1,length1) within (start2, length2) 2587 if (bno > bend || uend < bsy->busy_start)
2721 */ 2588 continue;
2722 if (bsy->busy_tp != NULL) { 2589
2723 bend = bsy->busy_start + bsy->busy_length - 1; 2590 /* (start1,length1) within (start2, length2) */
2724 if ((bno > bend) || (uend < bsy->busy_start)) { 2591 if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0)
2725 cnt--; 2592 lsn = bsy->busy_tp->t_commit_lsn;
2726 } else {
2727 TRACE_BUSYSEARCH("xfs_alloc_search_busy",
2728 "found1", agno, bno, len, tp);
2729 break;
2730 }
2731 }
2732 } 2593 }
2594 spin_unlock(&mp->m_perag[agno].pagb_lock);
2595 trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn);
2733 2596
2734 /* 2597 /*
2735 * If a block was found, force the log through the LSN of the 2598 * If a block was found, force the log through the LSN of the
2736 * transaction that freed the block 2599 * transaction that freed the block
2737 */ 2600 */
2738 if (cnt) { 2601 if (lsn)
2739 TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, tp);
2740 lsn = bsy->busy_tp->t_commit_lsn;
2741 spin_unlock(&mp->m_perag[agno].pagb_lock);
2742 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); 2602 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
2743 } else {
2744 TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, tp);
2745 spin_unlock(&mp->m_perag[agno].pagb_lock);
2746 }
2747} 2603}
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index e704caee10df..599bffa39784 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -37,6 +37,15 @@ typedef enum xfs_alloctype
37 XFS_ALLOCTYPE_THIS_BNO /* at exactly this block */ 37 XFS_ALLOCTYPE_THIS_BNO /* at exactly this block */
38} xfs_alloctype_t; 38} xfs_alloctype_t;
39 39
40#define XFS_ALLOC_TYPES \
41 { XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \
42 { XFS_ALLOCTYPE_FIRST_AG, "FIRST_AG" }, \
43 { XFS_ALLOCTYPE_START_AG, "START_AG" }, \
44 { XFS_ALLOCTYPE_THIS_AG, "THIS_AG" }, \
45 { XFS_ALLOCTYPE_START_BNO, "START_BNO" }, \
46 { XFS_ALLOCTYPE_NEAR_BNO, "NEAR_BNO" }, \
47 { XFS_ALLOCTYPE_THIS_BNO, "THIS_BNO" }
48
40/* 49/*
41 * Flags for xfs_alloc_fix_freelist. 50 * Flags for xfs_alloc_fix_freelist.
42 */ 51 */
@@ -109,24 +118,6 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp,
109 118
110#ifdef __KERNEL__ 119#ifdef __KERNEL__
111 120
112#if defined(XFS_ALLOC_TRACE)
113/*
114 * Allocation tracing buffer size.
115 */
116#define XFS_ALLOC_TRACE_SIZE 4096
117extern ktrace_t *xfs_alloc_trace_buf;
118
119/*
120 * Types for alloc tracing.
121 */
122#define XFS_ALLOC_KTRACE_ALLOC 1
123#define XFS_ALLOC_KTRACE_FREE 2
124#define XFS_ALLOC_KTRACE_MODAGF 3
125#define XFS_ALLOC_KTRACE_BUSY 4
126#define XFS_ALLOC_KTRACE_UNBUSY 5
127#define XFS_ALLOC_KTRACE_BUSYSEARCH 6
128#endif
129
130void 121void
131xfs_alloc_mark_busy(xfs_trans_t *tp, 122xfs_alloc_mark_busy(xfs_trans_t *tp,
132 xfs_agnumber_t agno, 123 xfs_agnumber_t agno,
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index c10c3a292d30..adbd9141aea1 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -39,6 +39,7 @@
39#include "xfs_ialloc.h" 39#include "xfs_ialloc.h"
40#include "xfs_alloc.h" 40#include "xfs_alloc.h"
41#include "xfs_error.h" 41#include "xfs_error.h"
42#include "xfs_trace.h"
42 43
43 44
44STATIC struct xfs_btree_cur * 45STATIC struct xfs_btree_cur *
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 8fe6f6b78a4a..e953b6cfb2a8 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -47,6 +47,7 @@
47#include "xfs_trans_space.h" 47#include "xfs_trans_space.h"
48#include "xfs_rw.h" 48#include "xfs_rw.h"
49#include "xfs_vnodeops.h" 49#include "xfs_vnodeops.h"
50#include "xfs_trace.h"
50 51
51/* 52/*
52 * xfs_attr.c 53 * xfs_attr.c
@@ -89,10 +90,6 @@ STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
89 90
90#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ 91#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
91 92
92#if defined(XFS_ATTR_TRACE)
93ktrace_t *xfs_attr_trace_buf;
94#endif
95
96STATIC int 93STATIC int
97xfs_attr_name_to_xname( 94xfs_attr_name_to_xname(
98 struct xfs_name *xname, 95 struct xfs_name *xname,
@@ -640,7 +637,6 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
640 return EIO; 637 return EIO;
641 638
642 xfs_ilock(dp, XFS_ILOCK_SHARED); 639 xfs_ilock(dp, XFS_ILOCK_SHARED);
643 xfs_attr_trace_l_c("syscall start", context);
644 640
645 /* 641 /*
646 * Decide on what work routines to call based on the inode size. 642 * Decide on what work routines to call based on the inode size.
@@ -656,7 +652,6 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
656 } 652 }
657 653
658 xfs_iunlock(dp, XFS_ILOCK_SHARED); 654 xfs_iunlock(dp, XFS_ILOCK_SHARED);
659 xfs_attr_trace_l_c("syscall end", context);
660 655
661 return error; 656 return error;
662} 657}
@@ -702,7 +697,7 @@ xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags,
702 context->count * sizeof(alist->al_offset[0]); 697 context->count * sizeof(alist->al_offset[0]);
703 context->firstu -= ATTR_ENTSIZE(namelen); 698 context->firstu -= ATTR_ENTSIZE(namelen);
704 if (context->firstu < arraytop) { 699 if (context->firstu < arraytop) {
705 xfs_attr_trace_l_c("buffer full", context); 700 trace_xfs_attr_list_full(context);
706 alist->al_more = 1; 701 alist->al_more = 1;
707 context->seen_enough = 1; 702 context->seen_enough = 1;
708 return 1; 703 return 1;
@@ -714,7 +709,7 @@ xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags,
714 aep->a_name[namelen] = 0; 709 aep->a_name[namelen] = 0;
715 alist->al_offset[context->count++] = context->firstu; 710 alist->al_offset[context->count++] = context->firstu;
716 alist->al_count = context->count; 711 alist->al_count = context->count;
717 xfs_attr_trace_l_c("add", context); 712 trace_xfs_attr_list_add(context);
718 return 0; 713 return 0;
719} 714}
720 715
@@ -1853,7 +1848,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1853 node = bp->data; 1848 node = bp->data;
1854 switch (be16_to_cpu(node->hdr.info.magic)) { 1849 switch (be16_to_cpu(node->hdr.info.magic)) {
1855 case XFS_DA_NODE_MAGIC: 1850 case XFS_DA_NODE_MAGIC:
1856 xfs_attr_trace_l_cn("wrong blk", context, node); 1851 trace_xfs_attr_list_wrong_blk(context);
1857 xfs_da_brelse(NULL, bp); 1852 xfs_da_brelse(NULL, bp);
1858 bp = NULL; 1853 bp = NULL;
1859 break; 1854 break;
@@ -1861,20 +1856,18 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1861 leaf = bp->data; 1856 leaf = bp->data;
1862 if (cursor->hashval > be32_to_cpu(leaf->entries[ 1857 if (cursor->hashval > be32_to_cpu(leaf->entries[
1863 be16_to_cpu(leaf->hdr.count)-1].hashval)) { 1858 be16_to_cpu(leaf->hdr.count)-1].hashval)) {
1864 xfs_attr_trace_l_cl("wrong blk", 1859 trace_xfs_attr_list_wrong_blk(context);
1865 context, leaf);
1866 xfs_da_brelse(NULL, bp); 1860 xfs_da_brelse(NULL, bp);
1867 bp = NULL; 1861 bp = NULL;
1868 } else if (cursor->hashval <= 1862 } else if (cursor->hashval <=
1869 be32_to_cpu(leaf->entries[0].hashval)) { 1863 be32_to_cpu(leaf->entries[0].hashval)) {
1870 xfs_attr_trace_l_cl("maybe wrong blk", 1864 trace_xfs_attr_list_wrong_blk(context);
1871 context, leaf);
1872 xfs_da_brelse(NULL, bp); 1865 xfs_da_brelse(NULL, bp);
1873 bp = NULL; 1866 bp = NULL;
1874 } 1867 }
1875 break; 1868 break;
1876 default: 1869 default:
1877 xfs_attr_trace_l_c("wrong blk - ??", context); 1870 trace_xfs_attr_list_wrong_blk(context);
1878 xfs_da_brelse(NULL, bp); 1871 xfs_da_brelse(NULL, bp);
1879 bp = NULL; 1872 bp = NULL;
1880 } 1873 }
@@ -1919,8 +1912,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1919 if (cursor->hashval 1912 if (cursor->hashval
1920 <= be32_to_cpu(btree->hashval)) { 1913 <= be32_to_cpu(btree->hashval)) {
1921 cursor->blkno = be32_to_cpu(btree->before); 1914 cursor->blkno = be32_to_cpu(btree->before);
1922 xfs_attr_trace_l_cb("descending", 1915 trace_xfs_attr_list_node_descend(context,
1923 context, btree); 1916 btree);
1924 break; 1917 break;
1925 } 1918 }
1926 } 1919 }
@@ -2270,85 +2263,3 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2270 } 2263 }
2271 return(0); 2264 return(0);
2272} 2265}
2273
2274#if defined(XFS_ATTR_TRACE)
2275/*
2276 * Add a trace buffer entry for an attr_list context structure.
2277 */
2278void
2279xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context)
2280{
2281 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where, context,
2282 (__psunsigned_t)NULL,
2283 (__psunsigned_t)NULL,
2284 (__psunsigned_t)NULL);
2285}
2286
2287/*
2288 * Add a trace buffer entry for a context structure and a Btree node.
2289 */
2290void
2291xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
2292 struct xfs_da_intnode *node)
2293{
2294 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where, context,
2295 (__psunsigned_t)be16_to_cpu(node->hdr.count),
2296 (__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
2297 (__psunsigned_t)be32_to_cpu(node->btree[
2298 be16_to_cpu(node->hdr.count)-1].hashval));
2299}
2300
2301/*
2302 * Add a trace buffer entry for a context structure and a Btree element.
2303 */
2304void
2305xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
2306 struct xfs_da_node_entry *btree)
2307{
2308 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where, context,
2309 (__psunsigned_t)be32_to_cpu(btree->hashval),
2310 (__psunsigned_t)be32_to_cpu(btree->before),
2311 (__psunsigned_t)NULL);
2312}
2313
2314/*
2315 * Add a trace buffer entry for a context structure and a leaf block.
2316 */
2317void
2318xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
2319 struct xfs_attr_leafblock *leaf)
2320{
2321 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where, context,
2322 (__psunsigned_t)be16_to_cpu(leaf->hdr.count),
2323 (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
2324 (__psunsigned_t)be32_to_cpu(leaf->entries[
2325 be16_to_cpu(leaf->hdr.count)-1].hashval));
2326}
2327
2328/*
2329 * Add a trace buffer entry for the arguments given to the routine,
2330 * generic form.
2331 */
2332void
2333xfs_attr_trace_enter(int type, char *where,
2334 struct xfs_attr_list_context *context,
2335 __psunsigned_t a13, __psunsigned_t a14,
2336 __psunsigned_t a15)
2337{
2338 ASSERT(xfs_attr_trace_buf);
2339 ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type),
2340 (void *)((__psunsigned_t)where),
2341 (void *)((__psunsigned_t)context->dp),
2342 (void *)((__psunsigned_t)context->cursor->hashval),
2343 (void *)((__psunsigned_t)context->cursor->blkno),
2344 (void *)((__psunsigned_t)context->cursor->offset),
2345 (void *)((__psunsigned_t)context->alist),
2346 (void *)((__psunsigned_t)context->bufsize),
2347 (void *)((__psunsigned_t)context->count),
2348 (void *)((__psunsigned_t)context->firstu),
2349 NULL,
2350 (void *)((__psunsigned_t)context->dupcnt),
2351 (void *)((__psunsigned_t)context->flags),
2352 (void *)a13, (void *)a14, (void *)a15);
2353}
2354#endif /* XFS_ATTR_TRACE */
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 12f0be3a73d4..59b410ce69a1 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -48,6 +48,16 @@ struct xfs_attr_list_context;
48#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */ 48#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */
49#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ 49#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */
50 50
51#define XFS_ATTR_FLAGS \
52 { ATTR_DONTFOLLOW, "DONTFOLLOW" }, \
53 { ATTR_ROOT, "ROOT" }, \
54 { ATTR_TRUST, "TRUST" }, \
55 { ATTR_SECURE, "SECURE" }, \
56 { ATTR_CREATE, "CREATE" }, \
57 { ATTR_REPLACE, "REPLACE" }, \
58 { ATTR_KERNOTIME, "KERNOTIME" }, \
59 { ATTR_KERNOVAL, "KERNOVAL" }
60
51/* 61/*
52 * The maximum size (into the kernel or returned from the kernel) of an 62 * The maximum size (into the kernel or returned from the kernel) of an
53 * attribute value or the buffer used for an attr_list() call. Larger 63 * attribute value or the buffer used for an attr_list() call. Larger
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 0b687351293f..baf41b5af756 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -42,6 +42,7 @@
42#include "xfs_attr.h" 42#include "xfs_attr.h"
43#include "xfs_attr_leaf.h" 43#include "xfs_attr_leaf.h"
44#include "xfs_error.h" 44#include "xfs_error.h"
45#include "xfs_trace.h"
45 46
46/* 47/*
47 * xfs_attr_leaf.c 48 * xfs_attr_leaf.c
@@ -594,7 +595,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
594 cursor = context->cursor; 595 cursor = context->cursor;
595 ASSERT(cursor != NULL); 596 ASSERT(cursor != NULL);
596 597
597 xfs_attr_trace_l_c("sf start", context); 598 trace_xfs_attr_list_sf(context);
598 599
599 /* 600 /*
600 * If the buffer is large enough and the cursor is at the start, 601 * If the buffer is large enough and the cursor is at the start,
@@ -627,7 +628,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
627 return error; 628 return error;
628 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 629 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
629 } 630 }
630 xfs_attr_trace_l_c("sf big-gulp", context); 631 trace_xfs_attr_list_sf_all(context);
631 return(0); 632 return(0);
632 } 633 }
633 634
@@ -653,7 +654,6 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
653 XFS_CORRUPTION_ERROR("xfs_attr_shortform_list", 654 XFS_CORRUPTION_ERROR("xfs_attr_shortform_list",
654 XFS_ERRLEVEL_LOW, 655 XFS_ERRLEVEL_LOW,
655 context->dp->i_mount, sfe); 656 context->dp->i_mount, sfe);
656 xfs_attr_trace_l_c("sf corrupted", context);
657 kmem_free(sbuf); 657 kmem_free(sbuf);
658 return XFS_ERROR(EFSCORRUPTED); 658 return XFS_ERROR(EFSCORRUPTED);
659 } 659 }
@@ -693,7 +693,6 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
693 } 693 }
694 if (i == nsbuf) { 694 if (i == nsbuf) {
695 kmem_free(sbuf); 695 kmem_free(sbuf);
696 xfs_attr_trace_l_c("blk end", context);
697 return(0); 696 return(0);
698 } 697 }
699 698
@@ -719,7 +718,6 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
719 } 718 }
720 719
721 kmem_free(sbuf); 720 kmem_free(sbuf);
722 xfs_attr_trace_l_c("sf E-O-F", context);
723 return(0); 721 return(0);
724} 722}
725 723
@@ -2323,7 +2321,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2323 cursor = context->cursor; 2321 cursor = context->cursor;
2324 cursor->initted = 1; 2322 cursor->initted = 1;
2325 2323
2326 xfs_attr_trace_l_cl("blk start", context, leaf); 2324 trace_xfs_attr_list_leaf(context);
2327 2325
2328 /* 2326 /*
2329 * Re-find our place in the leaf block if this is a new syscall. 2327 * Re-find our place in the leaf block if this is a new syscall.
@@ -2344,7 +2342,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2344 } 2342 }
2345 } 2343 }
2346 if (i == be16_to_cpu(leaf->hdr.count)) { 2344 if (i == be16_to_cpu(leaf->hdr.count)) {
2347 xfs_attr_trace_l_c("not found", context); 2345 trace_xfs_attr_list_notfound(context);
2348 return(0); 2346 return(0);
2349 } 2347 }
2350 } else { 2348 } else {
@@ -2419,7 +2417,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2419 break; 2417 break;
2420 cursor->offset++; 2418 cursor->offset++;
2421 } 2419 }
2422 xfs_attr_trace_l_cl("blk end", context, leaf); 2420 trace_xfs_attr_list_leaf_end(context);
2423 return(retval); 2421 return(retval);
2424} 2422}
2425 2423
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h
index ea22839caed2..76ab7b0cbb3a 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/xfs_attr_sf.h
@@ -25,8 +25,6 @@
25 * to fit into the literal area of the inode. 25 * to fit into the literal area of the inode.
26 */ 26 */
27 27
28struct xfs_inode;
29
30/* 28/*
31 * Entries are packed toward the top as tight as possible. 29 * Entries are packed toward the top as tight as possible.
32 */ 30 */
@@ -69,42 +67,4 @@ typedef struct xfs_attr_sf_sort {
69 (be16_to_cpu(((xfs_attr_shortform_t *) \ 67 (be16_to_cpu(((xfs_attr_shortform_t *) \
70 ((dp)->i_afp->if_u1.if_data))->hdr.totsize)) 68 ((dp)->i_afp->if_u1.if_data))->hdr.totsize))
71 69
72#if defined(XFS_ATTR_TRACE)
73/*
74 * Kernel tracing support for attribute lists
75 */
76struct xfs_attr_list_context;
77struct xfs_da_intnode;
78struct xfs_da_node_entry;
79struct xfs_attr_leafblock;
80
81#define XFS_ATTR_TRACE_SIZE 4096 /* size of global trace buffer */
82extern ktrace_t *xfs_attr_trace_buf;
83
84/*
85 * Trace record types.
86 */
87#define XFS_ATTR_KTRACE_L_C 1 /* context */
88#define XFS_ATTR_KTRACE_L_CN 2 /* context, node */
89#define XFS_ATTR_KTRACE_L_CB 3 /* context, btree */
90#define XFS_ATTR_KTRACE_L_CL 4 /* context, leaf */
91
92void xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context);
93void xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
94 struct xfs_da_intnode *node);
95void xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
96 struct xfs_da_node_entry *btree);
97void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
98 struct xfs_attr_leafblock *leaf);
99void xfs_attr_trace_enter(int type, char *where,
100 struct xfs_attr_list_context *context,
101 __psunsigned_t a13, __psunsigned_t a14,
102 __psunsigned_t a15);
103#else
104#define xfs_attr_trace_l_c(w,c)
105#define xfs_attr_trace_l_cn(w,c,n)
106#define xfs_attr_trace_l_cb(w,c,b)
107#define xfs_attr_trace_l_cl(w,c,l)
108#endif /* XFS_ATTR_TRACE */
109
110#endif /* __XFS_ATTR_SF_H__ */ 70#endif /* __XFS_ATTR_SF_H__ */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 8971fb09d387..98251cdc52aa 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -54,6 +54,7 @@
54#include "xfs_buf_item.h" 54#include "xfs_buf_item.h"
55#include "xfs_filestream.h" 55#include "xfs_filestream.h"
56#include "xfs_vnodeops.h" 56#include "xfs_vnodeops.h"
57#include "xfs_trace.h"
57 58
58 59
59#ifdef DEBUG 60#ifdef DEBUG
@@ -272,71 +273,6 @@ xfs_bmap_isaeof(
272 int whichfork, /* data or attribute fork */ 273 int whichfork, /* data or attribute fork */
273 char *aeof); /* return value */ 274 char *aeof); /* return value */
274 275
275#ifdef XFS_BMAP_TRACE
276/*
277 * Add bmap trace entry prior to a call to xfs_iext_remove.
278 */
279STATIC void
280xfs_bmap_trace_delete(
281 const char *fname, /* function name */
282 char *desc, /* operation description */
283 xfs_inode_t *ip, /* incore inode pointer */
284 xfs_extnum_t idx, /* index of entry(entries) deleted */
285 xfs_extnum_t cnt, /* count of entries deleted, 1 or 2 */
286 int whichfork); /* data or attr fork */
287
288/*
289 * Add bmap trace entry prior to a call to xfs_iext_insert, or
290 * reading in the extents list from the disk (in the btree).
291 */
292STATIC void
293xfs_bmap_trace_insert(
294 const char *fname, /* function name */
295 char *desc, /* operation description */
296 xfs_inode_t *ip, /* incore inode pointer */
297 xfs_extnum_t idx, /* index of entry(entries) inserted */
298 xfs_extnum_t cnt, /* count of entries inserted, 1 or 2 */
299 xfs_bmbt_irec_t *r1, /* inserted record 1 */
300 xfs_bmbt_irec_t *r2, /* inserted record 2 or null */
301 int whichfork); /* data or attr fork */
302
303/*
304 * Add bmap trace entry after updating an extent record in place.
305 */
306STATIC void
307xfs_bmap_trace_post_update(
308 const char *fname, /* function name */
309 char *desc, /* operation description */
310 xfs_inode_t *ip, /* incore inode pointer */
311 xfs_extnum_t idx, /* index of entry updated */
312 int whichfork); /* data or attr fork */
313
314/*
315 * Add bmap trace entry prior to updating an extent record in place.
316 */
317STATIC void
318xfs_bmap_trace_pre_update(
319 const char *fname, /* function name */
320 char *desc, /* operation description */
321 xfs_inode_t *ip, /* incore inode pointer */
322 xfs_extnum_t idx, /* index of entry to be updated */
323 int whichfork); /* data or attr fork */
324
325#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) \
326 xfs_bmap_trace_delete(__func__,d,ip,i,c,w)
327#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) \
328 xfs_bmap_trace_insert(__func__,d,ip,i,c,r1,r2,w)
329#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w) \
330 xfs_bmap_trace_post_update(__func__,d,ip,i,w)
331#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w) \
332 xfs_bmap_trace_pre_update(__func__,d,ip,i,w)
333#else
334#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w)
335#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w)
336#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w)
337#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w)
338#endif /* XFS_BMAP_TRACE */
339
340/* 276/*
341 * Compute the worst-case number of indirect blocks that will be used 277 * Compute the worst-case number of indirect blocks that will be used
342 * for ip's delayed extent of length "len". 278 * for ip's delayed extent of length "len".
@@ -363,18 +299,6 @@ xfs_bmap_validate_ret(
363#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) 299#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
364#endif /* DEBUG */ 300#endif /* DEBUG */
365 301
366#if defined(XFS_RW_TRACE)
367STATIC void
368xfs_bunmap_trace(
369 xfs_inode_t *ip,
370 xfs_fileoff_t bno,
371 xfs_filblks_t len,
372 int flags,
373 inst_t *ra);
374#else
375#define xfs_bunmap_trace(ip, bno, len, flags, ra)
376#endif /* XFS_RW_TRACE */
377
378STATIC int 302STATIC int
379xfs_bmap_count_tree( 303xfs_bmap_count_tree(
380 xfs_mount_t *mp, 304 xfs_mount_t *mp,
@@ -590,9 +514,9 @@ xfs_bmap_add_extent(
590 * already extents in the list. 514 * already extents in the list.
591 */ 515 */
592 if (nextents == 0) { 516 if (nextents == 0) {
593 XFS_BMAP_TRACE_INSERT("insert empty", ip, 0, 1, new, NULL, 517 xfs_iext_insert(ip, 0, 1, new,
594 whichfork); 518 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
595 xfs_iext_insert(ifp, 0, 1, new); 519
596 ASSERT(cur == NULL); 520 ASSERT(cur == NULL);
597 ifp->if_lastex = 0; 521 ifp->if_lastex = 0;
598 if (!isnullstartblock(new->br_startblock)) { 522 if (!isnullstartblock(new->br_startblock)) {
@@ -759,26 +683,10 @@ xfs_bmap_add_extent_delay_real(
759 xfs_filblks_t temp=0; /* value for dnew calculations */ 683 xfs_filblks_t temp=0; /* value for dnew calculations */
760 xfs_filblks_t temp2=0;/* value for dnew calculations */ 684 xfs_filblks_t temp2=0;/* value for dnew calculations */
761 int tmp_rval; /* partial logging flags */ 685 int tmp_rval; /* partial logging flags */
762 enum { /* bit number definitions for state */
763 LEFT_CONTIG, RIGHT_CONTIG,
764 LEFT_FILLING, RIGHT_FILLING,
765 LEFT_DELAY, RIGHT_DELAY,
766 LEFT_VALID, RIGHT_VALID
767 };
768 686
769#define LEFT r[0] 687#define LEFT r[0]
770#define RIGHT r[1] 688#define RIGHT r[1]
771#define PREV r[2] 689#define PREV r[2]
772#define MASK(b) (1 << (b))
773#define MASK2(a,b) (MASK(a) | MASK(b))
774#define MASK3(a,b,c) (MASK2(a,b) | MASK(c))
775#define MASK4(a,b,c,d) (MASK3(a,b,c) | MASK(d))
776#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
777#define STATE_TEST(b) (state & MASK(b))
778#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \
779 ((state &= ~MASK(b)), 0))
780#define SWITCH_STATE \
781 (state & MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG))
782 690
783 /* 691 /*
784 * Set up a bunch of variables to make the tests simpler. 692 * Set up a bunch of variables to make the tests simpler.
@@ -790,69 +698,80 @@ xfs_bmap_add_extent_delay_real(
790 new_endoff = new->br_startoff + new->br_blockcount; 698 new_endoff = new->br_startoff + new->br_blockcount;
791 ASSERT(PREV.br_startoff <= new->br_startoff); 699 ASSERT(PREV.br_startoff <= new->br_startoff);
792 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 700 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
701
793 /* 702 /*
794 * Set flags determining what part of the previous delayed allocation 703 * Set flags determining what part of the previous delayed allocation
795 * extent is being replaced by a real allocation. 704 * extent is being replaced by a real allocation.
796 */ 705 */
797 STATE_SET(LEFT_FILLING, PREV.br_startoff == new->br_startoff); 706 if (PREV.br_startoff == new->br_startoff)
798 STATE_SET(RIGHT_FILLING, 707 state |= BMAP_LEFT_FILLING;
799 PREV.br_startoff + PREV.br_blockcount == new_endoff); 708 if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
709 state |= BMAP_RIGHT_FILLING;
710
800 /* 711 /*
801 * Check and set flags if this segment has a left neighbor. 712 * Check and set flags if this segment has a left neighbor.
802 * Don't set contiguous if the combined extent would be too large. 713 * Don't set contiguous if the combined extent would be too large.
803 */ 714 */
804 if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { 715 if (idx > 0) {
716 state |= BMAP_LEFT_VALID;
805 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); 717 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT);
806 STATE_SET(LEFT_DELAY, isnullstartblock(LEFT.br_startblock)); 718
719 if (isnullstartblock(LEFT.br_startblock))
720 state |= BMAP_LEFT_DELAY;
807 } 721 }
808 STATE_SET(LEFT_CONTIG, 722
809 STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) && 723 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
810 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && 724 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
811 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && 725 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
812 LEFT.br_state == new->br_state && 726 LEFT.br_state == new->br_state &&
813 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN); 727 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
728 state |= BMAP_LEFT_CONTIG;
729
814 /* 730 /*
815 * Check and set flags if this segment has a right neighbor. 731 * Check and set flags if this segment has a right neighbor.
816 * Don't set contiguous if the combined extent would be too large. 732 * Don't set contiguous if the combined extent would be too large.
817 * Also check for all-three-contiguous being too large. 733 * Also check for all-three-contiguous being too large.
818 */ 734 */
819 if (STATE_SET_TEST(RIGHT_VALID, 735 if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
820 idx < 736 state |= BMAP_RIGHT_VALID;
821 ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) {
822 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); 737 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT);
823 STATE_SET(RIGHT_DELAY, isnullstartblock(RIGHT.br_startblock)); 738
739 if (isnullstartblock(RIGHT.br_startblock))
740 state |= BMAP_RIGHT_DELAY;
824 } 741 }
825 STATE_SET(RIGHT_CONTIG, 742
826 STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) && 743 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
827 new_endoff == RIGHT.br_startoff && 744 new_endoff == RIGHT.br_startoff &&
828 new->br_startblock + new->br_blockcount == 745 new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
829 RIGHT.br_startblock && 746 new->br_state == RIGHT.br_state &&
830 new->br_state == RIGHT.br_state && 747 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
831 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && 748 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
832 ((state & MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING)) != 749 BMAP_RIGHT_FILLING)) !=
833 MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING) || 750 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
834 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount 751 BMAP_RIGHT_FILLING) ||
835 <= MAXEXTLEN)); 752 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
753 <= MAXEXTLEN))
754 state |= BMAP_RIGHT_CONTIG;
755
836 error = 0; 756 error = 0;
837 /* 757 /*
838 * Switch out based on the FILLING and CONTIG state bits. 758 * Switch out based on the FILLING and CONTIG state bits.
839 */ 759 */
840 switch (SWITCH_STATE) { 760 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
841 761 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
842 case MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 762 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
763 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
843 /* 764 /*
844 * Filling in all of a previously delayed allocation extent. 765 * Filling in all of a previously delayed allocation extent.
845 * The left and right neighbors are both contiguous with new. 766 * The left and right neighbors are both contiguous with new.
846 */ 767 */
847 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1, 768 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
848 XFS_DATA_FORK);
849 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 769 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
850 LEFT.br_blockcount + PREV.br_blockcount + 770 LEFT.br_blockcount + PREV.br_blockcount +
851 RIGHT.br_blockcount); 771 RIGHT.br_blockcount);
852 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1, 772 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
853 XFS_DATA_FORK); 773
854 XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK); 774 xfs_iext_remove(ip, idx, 2, state);
855 xfs_iext_remove(ifp, idx, 2);
856 ip->i_df.if_lastex = idx - 1; 775 ip->i_df.if_lastex = idx - 1;
857 ip->i_d.di_nextents--; 776 ip->i_d.di_nextents--;
858 if (cur == NULL) 777 if (cur == NULL)
@@ -885,20 +804,18 @@ xfs_bmap_add_extent_delay_real(
885 RIGHT.br_blockcount; 804 RIGHT.br_blockcount;
886 break; 805 break;
887 806
888 case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG): 807 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
889 /* 808 /*
890 * Filling in all of a previously delayed allocation extent. 809 * Filling in all of a previously delayed allocation extent.
891 * The left neighbor is contiguous, the right is not. 810 * The left neighbor is contiguous, the right is not.
892 */ 811 */
893 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1, 812 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
894 XFS_DATA_FORK);
895 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 813 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
896 LEFT.br_blockcount + PREV.br_blockcount); 814 LEFT.br_blockcount + PREV.br_blockcount);
897 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1, 815 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
898 XFS_DATA_FORK); 816
899 ip->i_df.if_lastex = idx - 1; 817 ip->i_df.if_lastex = idx - 1;
900 XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK); 818 xfs_iext_remove(ip, idx, 1, state);
901 xfs_iext_remove(ifp, idx, 1);
902 if (cur == NULL) 819 if (cur == NULL)
903 rval = XFS_ILOG_DEXT; 820 rval = XFS_ILOG_DEXT;
904 else { 821 else {
@@ -921,19 +838,19 @@ xfs_bmap_add_extent_delay_real(
921 PREV.br_blockcount; 838 PREV.br_blockcount;
922 break; 839 break;
923 840
924 case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG): 841 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
925 /* 842 /*
926 * Filling in all of a previously delayed allocation extent. 843 * Filling in all of a previously delayed allocation extent.
927 * The right neighbor is contiguous, the left is not. 844 * The right neighbor is contiguous, the left is not.
928 */ 845 */
929 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK); 846 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
930 xfs_bmbt_set_startblock(ep, new->br_startblock); 847 xfs_bmbt_set_startblock(ep, new->br_startblock);
931 xfs_bmbt_set_blockcount(ep, 848 xfs_bmbt_set_blockcount(ep,
932 PREV.br_blockcount + RIGHT.br_blockcount); 849 PREV.br_blockcount + RIGHT.br_blockcount);
933 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK); 850 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
851
934 ip->i_df.if_lastex = idx; 852 ip->i_df.if_lastex = idx;
935 XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK); 853 xfs_iext_remove(ip, idx + 1, 1, state);
936 xfs_iext_remove(ifp, idx + 1, 1);
937 if (cur == NULL) 854 if (cur == NULL)
938 rval = XFS_ILOG_DEXT; 855 rval = XFS_ILOG_DEXT;
939 else { 856 else {
@@ -956,15 +873,16 @@ xfs_bmap_add_extent_delay_real(
956 RIGHT.br_blockcount; 873 RIGHT.br_blockcount;
957 break; 874 break;
958 875
959 case MASK2(LEFT_FILLING, RIGHT_FILLING): 876 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
960 /* 877 /*
961 * Filling in all of a previously delayed allocation extent. 878 * Filling in all of a previously delayed allocation extent.
962 * Neither the left nor right neighbors are contiguous with 879 * Neither the left nor right neighbors are contiguous with
963 * the new one. 880 * the new one.
964 */ 881 */
965 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK); 882 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
966 xfs_bmbt_set_startblock(ep, new->br_startblock); 883 xfs_bmbt_set_startblock(ep, new->br_startblock);
967 XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK); 884 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
885
968 ip->i_df.if_lastex = idx; 886 ip->i_df.if_lastex = idx;
969 ip->i_d.di_nextents++; 887 ip->i_d.di_nextents++;
970 if (cur == NULL) 888 if (cur == NULL)
@@ -987,19 +905,20 @@ xfs_bmap_add_extent_delay_real(
987 temp2 = new->br_blockcount; 905 temp2 = new->br_blockcount;
988 break; 906 break;
989 907
990 case MASK2(LEFT_FILLING, LEFT_CONTIG): 908 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
991 /* 909 /*
992 * Filling in the first part of a previous delayed allocation. 910 * Filling in the first part of a previous delayed allocation.
993 * The left neighbor is contiguous. 911 * The left neighbor is contiguous.
994 */ 912 */
995 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK); 913 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
996 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 914 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
997 LEFT.br_blockcount + new->br_blockcount); 915 LEFT.br_blockcount + new->br_blockcount);
998 xfs_bmbt_set_startoff(ep, 916 xfs_bmbt_set_startoff(ep,
999 PREV.br_startoff + new->br_blockcount); 917 PREV.br_startoff + new->br_blockcount);
1000 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK); 918 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
919
1001 temp = PREV.br_blockcount - new->br_blockcount; 920 temp = PREV.br_blockcount - new->br_blockcount;
1002 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK); 921 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1003 xfs_bmbt_set_blockcount(ep, temp); 922 xfs_bmbt_set_blockcount(ep, temp);
1004 ip->i_df.if_lastex = idx - 1; 923 ip->i_df.if_lastex = idx - 1;
1005 if (cur == NULL) 924 if (cur == NULL)
@@ -1021,7 +940,7 @@ xfs_bmap_add_extent_delay_real(
1021 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 940 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
1022 startblockval(PREV.br_startblock)); 941 startblockval(PREV.br_startblock));
1023 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 942 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1024 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK); 943 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1025 *dnew = temp; 944 *dnew = temp;
1026 /* DELTA: The boundary between two in-core extents moved. */ 945 /* DELTA: The boundary between two in-core extents moved. */
1027 temp = LEFT.br_startoff; 946 temp = LEFT.br_startoff;
@@ -1029,18 +948,16 @@ xfs_bmap_add_extent_delay_real(
1029 PREV.br_blockcount; 948 PREV.br_blockcount;
1030 break; 949 break;
1031 950
1032 case MASK(LEFT_FILLING): 951 case BMAP_LEFT_FILLING:
1033 /* 952 /*
1034 * Filling in the first part of a previous delayed allocation. 953 * Filling in the first part of a previous delayed allocation.
1035 * The left neighbor is not contiguous. 954 * The left neighbor is not contiguous.
1036 */ 955 */
1037 XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK); 956 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1038 xfs_bmbt_set_startoff(ep, new_endoff); 957 xfs_bmbt_set_startoff(ep, new_endoff);
1039 temp = PREV.br_blockcount - new->br_blockcount; 958 temp = PREV.br_blockcount - new->br_blockcount;
1040 xfs_bmbt_set_blockcount(ep, temp); 959 xfs_bmbt_set_blockcount(ep, temp);
1041 XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL, 960 xfs_iext_insert(ip, idx, 1, new, state);
1042 XFS_DATA_FORK);
1043 xfs_iext_insert(ifp, idx, 1, new);
1044 ip->i_df.if_lastex = idx; 961 ip->i_df.if_lastex = idx;
1045 ip->i_d.di_nextents++; 962 ip->i_d.di_nextents++;
1046 if (cur == NULL) 963 if (cur == NULL)
@@ -1071,27 +988,27 @@ xfs_bmap_add_extent_delay_real(
1071 (cur ? cur->bc_private.b.allocated : 0)); 988 (cur ? cur->bc_private.b.allocated : 0));
1072 ep = xfs_iext_get_ext(ifp, idx + 1); 989 ep = xfs_iext_get_ext(ifp, idx + 1);
1073 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 990 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1074 XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx + 1, XFS_DATA_FORK); 991 trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_);
1075 *dnew = temp; 992 *dnew = temp;
1076 /* DELTA: One in-core extent is split in two. */ 993 /* DELTA: One in-core extent is split in two. */
1077 temp = PREV.br_startoff; 994 temp = PREV.br_startoff;
1078 temp2 = PREV.br_blockcount; 995 temp2 = PREV.br_blockcount;
1079 break; 996 break;
1080 997
1081 case MASK2(RIGHT_FILLING, RIGHT_CONTIG): 998 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1082 /* 999 /*
1083 * Filling in the last part of a previous delayed allocation. 1000 * Filling in the last part of a previous delayed allocation.
1084 * The right neighbor is contiguous with the new allocation. 1001 * The right neighbor is contiguous with the new allocation.
1085 */ 1002 */
1086 temp = PREV.br_blockcount - new->br_blockcount; 1003 temp = PREV.br_blockcount - new->br_blockcount;
1087 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK); 1004 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1088 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK); 1005 trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_);
1089 xfs_bmbt_set_blockcount(ep, temp); 1006 xfs_bmbt_set_blockcount(ep, temp);
1090 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), 1007 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
1091 new->br_startoff, new->br_startblock, 1008 new->br_startoff, new->br_startblock,
1092 new->br_blockcount + RIGHT.br_blockcount, 1009 new->br_blockcount + RIGHT.br_blockcount,
1093 RIGHT.br_state); 1010 RIGHT.br_state);
1094 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK); 1011 trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_);
1095 ip->i_df.if_lastex = idx + 1; 1012 ip->i_df.if_lastex = idx + 1;
1096 if (cur == NULL) 1013 if (cur == NULL)
1097 rval = XFS_ILOG_DEXT; 1014 rval = XFS_ILOG_DEXT;
@@ -1112,7 +1029,7 @@ xfs_bmap_add_extent_delay_real(
1112 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 1029 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
1113 startblockval(PREV.br_startblock)); 1030 startblockval(PREV.br_startblock));
1114 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 1031 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1115 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK); 1032 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1116 *dnew = temp; 1033 *dnew = temp;
1117 /* DELTA: The boundary between two in-core extents moved. */ 1034 /* DELTA: The boundary between two in-core extents moved. */
1118 temp = PREV.br_startoff; 1035 temp = PREV.br_startoff;
@@ -1120,17 +1037,15 @@ xfs_bmap_add_extent_delay_real(
1120 RIGHT.br_blockcount; 1037 RIGHT.br_blockcount;
1121 break; 1038 break;
1122 1039
1123 case MASK(RIGHT_FILLING): 1040 case BMAP_RIGHT_FILLING:
1124 /* 1041 /*
1125 * Filling in the last part of a previous delayed allocation. 1042 * Filling in the last part of a previous delayed allocation.
1126 * The right neighbor is not contiguous. 1043 * The right neighbor is not contiguous.
1127 */ 1044 */
1128 temp = PREV.br_blockcount - new->br_blockcount; 1045 temp = PREV.br_blockcount - new->br_blockcount;
1129 XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK); 1046 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1130 xfs_bmbt_set_blockcount(ep, temp); 1047 xfs_bmbt_set_blockcount(ep, temp);
1131 XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL, 1048 xfs_iext_insert(ip, idx + 1, 1, new, state);
1132 XFS_DATA_FORK);
1133 xfs_iext_insert(ifp, idx + 1, 1, new);
1134 ip->i_df.if_lastex = idx + 1; 1049 ip->i_df.if_lastex = idx + 1;
1135 ip->i_d.di_nextents++; 1050 ip->i_d.di_nextents++;
1136 if (cur == NULL) 1051 if (cur == NULL)
@@ -1161,7 +1076,7 @@ xfs_bmap_add_extent_delay_real(
1161 (cur ? cur->bc_private.b.allocated : 0)); 1076 (cur ? cur->bc_private.b.allocated : 0));
1162 ep = xfs_iext_get_ext(ifp, idx); 1077 ep = xfs_iext_get_ext(ifp, idx);
1163 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 1078 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1164 XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK); 1079 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1165 *dnew = temp; 1080 *dnew = temp;
1166 /* DELTA: One in-core extent is split in two. */ 1081 /* DELTA: One in-core extent is split in two. */
1167 temp = PREV.br_startoff; 1082 temp = PREV.br_startoff;
@@ -1175,7 +1090,7 @@ xfs_bmap_add_extent_delay_real(
1175 * This case is avoided almost all the time. 1090 * This case is avoided almost all the time.
1176 */ 1091 */
1177 temp = new->br_startoff - PREV.br_startoff; 1092 temp = new->br_startoff - PREV.br_startoff;
1178 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK); 1093 trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_);
1179 xfs_bmbt_set_blockcount(ep, temp); 1094 xfs_bmbt_set_blockcount(ep, temp);
1180 r[0] = *new; 1095 r[0] = *new;
1181 r[1].br_state = PREV.br_state; 1096 r[1].br_state = PREV.br_state;
@@ -1183,9 +1098,7 @@ xfs_bmap_add_extent_delay_real(
1183 r[1].br_startoff = new_endoff; 1098 r[1].br_startoff = new_endoff;
1184 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; 1099 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
1185 r[1].br_blockcount = temp2; 1100 r[1].br_blockcount = temp2;
1186 XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1], 1101 xfs_iext_insert(ip, idx + 1, 2, &r[0], state);
1187 XFS_DATA_FORK);
1188 xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
1189 ip->i_df.if_lastex = idx + 1; 1102 ip->i_df.if_lastex = idx + 1;
1190 ip->i_d.di_nextents++; 1103 ip->i_d.di_nextents++;
1191 if (cur == NULL) 1104 if (cur == NULL)
@@ -1242,24 +1155,24 @@ xfs_bmap_add_extent_delay_real(
1242 } 1155 }
1243 ep = xfs_iext_get_ext(ifp, idx); 1156 ep = xfs_iext_get_ext(ifp, idx);
1244 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 1157 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1245 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK); 1158 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1246 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx + 2, XFS_DATA_FORK); 1159 trace_xfs_bmap_pre_update(ip, idx + 2, state, _THIS_IP_);
1247 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2), 1160 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2),
1248 nullstartblock((int)temp2)); 1161 nullstartblock((int)temp2));
1249 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx + 2, XFS_DATA_FORK); 1162 trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_);
1250 *dnew = temp + temp2; 1163 *dnew = temp + temp2;
1251 /* DELTA: One in-core extent is split in three. */ 1164 /* DELTA: One in-core extent is split in three. */
1252 temp = PREV.br_startoff; 1165 temp = PREV.br_startoff;
1253 temp2 = PREV.br_blockcount; 1166 temp2 = PREV.br_blockcount;
1254 break; 1167 break;
1255 1168
1256 case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1169 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1257 case MASK3(RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1170 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1258 case MASK2(LEFT_FILLING, RIGHT_CONTIG): 1171 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1259 case MASK2(RIGHT_FILLING, LEFT_CONTIG): 1172 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1260 case MASK2(LEFT_CONTIG, RIGHT_CONTIG): 1173 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1261 case MASK(LEFT_CONTIG): 1174 case BMAP_LEFT_CONTIG:
1262 case MASK(RIGHT_CONTIG): 1175 case BMAP_RIGHT_CONTIG:
1263 /* 1176 /*
1264 * These cases are all impossible. 1177 * These cases are all impossible.
1265 */ 1178 */
@@ -1279,14 +1192,6 @@ done:
1279#undef LEFT 1192#undef LEFT
1280#undef RIGHT 1193#undef RIGHT
1281#undef PREV 1194#undef PREV
1282#undef MASK
1283#undef MASK2
1284#undef MASK3
1285#undef MASK4
1286#undef STATE_SET
1287#undef STATE_TEST
1288#undef STATE_SET_TEST
1289#undef SWITCH_STATE
1290} 1195}
1291 1196
1292/* 1197/*
@@ -1316,27 +1221,10 @@ xfs_bmap_add_extent_unwritten_real(
1316 int state = 0;/* state bits, accessed thru macros */ 1221 int state = 0;/* state bits, accessed thru macros */
1317 xfs_filblks_t temp=0; 1222 xfs_filblks_t temp=0;
1318 xfs_filblks_t temp2=0; 1223 xfs_filblks_t temp2=0;
1319 enum { /* bit number definitions for state */
1320 LEFT_CONTIG, RIGHT_CONTIG,
1321 LEFT_FILLING, RIGHT_FILLING,
1322 LEFT_DELAY, RIGHT_DELAY,
1323 LEFT_VALID, RIGHT_VALID
1324 };
1325 1224
1326#define LEFT r[0] 1225#define LEFT r[0]
1327#define RIGHT r[1] 1226#define RIGHT r[1]
1328#define PREV r[2] 1227#define PREV r[2]
1329#define MASK(b) (1 << (b))
1330#define MASK2(a,b) (MASK(a) | MASK(b))
1331#define MASK3(a,b,c) (MASK2(a,b) | MASK(c))
1332#define MASK4(a,b,c,d) (MASK3(a,b,c) | MASK(d))
1333#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
1334#define STATE_TEST(b) (state & MASK(b))
1335#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \
1336 ((state &= ~MASK(b)), 0))
1337#define SWITCH_STATE \
1338 (state & MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG))
1339
1340 /* 1228 /*
1341 * Set up a bunch of variables to make the tests simpler. 1229 * Set up a bunch of variables to make the tests simpler.
1342 */ 1230 */
@@ -1352,68 +1240,78 @@ xfs_bmap_add_extent_unwritten_real(
1352 new_endoff = new->br_startoff + new->br_blockcount; 1240 new_endoff = new->br_startoff + new->br_blockcount;
1353 ASSERT(PREV.br_startoff <= new->br_startoff); 1241 ASSERT(PREV.br_startoff <= new->br_startoff);
1354 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 1242 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1243
1355 /* 1244 /*
1356 * Set flags determining what part of the previous oldext allocation 1245 * Set flags determining what part of the previous oldext allocation
1357 * extent is being replaced by a newext allocation. 1246 * extent is being replaced by a newext allocation.
1358 */ 1247 */
1359 STATE_SET(LEFT_FILLING, PREV.br_startoff == new->br_startoff); 1248 if (PREV.br_startoff == new->br_startoff)
1360 STATE_SET(RIGHT_FILLING, 1249 state |= BMAP_LEFT_FILLING;
1361 PREV.br_startoff + PREV.br_blockcount == new_endoff); 1250 if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1251 state |= BMAP_RIGHT_FILLING;
1252
1362 /* 1253 /*
1363 * Check and set flags if this segment has a left neighbor. 1254 * Check and set flags if this segment has a left neighbor.
1364 * Don't set contiguous if the combined extent would be too large. 1255 * Don't set contiguous if the combined extent would be too large.
1365 */ 1256 */
1366 if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { 1257 if (idx > 0) {
1258 state |= BMAP_LEFT_VALID;
1367 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); 1259 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT);
1368 STATE_SET(LEFT_DELAY, isnullstartblock(LEFT.br_startblock)); 1260
1261 if (isnullstartblock(LEFT.br_startblock))
1262 state |= BMAP_LEFT_DELAY;
1369 } 1263 }
1370 STATE_SET(LEFT_CONTIG, 1264
1371 STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) && 1265 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1372 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && 1266 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1373 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && 1267 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1374 LEFT.br_state == newext && 1268 LEFT.br_state == newext &&
1375 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN); 1269 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1270 state |= BMAP_LEFT_CONTIG;
1271
1376 /* 1272 /*
1377 * Check and set flags if this segment has a right neighbor. 1273 * Check and set flags if this segment has a right neighbor.
1378 * Don't set contiguous if the combined extent would be too large. 1274 * Don't set contiguous if the combined extent would be too large.
1379 * Also check for all-three-contiguous being too large. 1275 * Also check for all-three-contiguous being too large.
1380 */ 1276 */
1381 if (STATE_SET_TEST(RIGHT_VALID, 1277 if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
1382 idx < 1278 state |= BMAP_RIGHT_VALID;
1383 ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) {
1384 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); 1279 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT);
1385 STATE_SET(RIGHT_DELAY, isnullstartblock(RIGHT.br_startblock)); 1280 if (isnullstartblock(RIGHT.br_startblock))
1281 state |= BMAP_RIGHT_DELAY;
1386 } 1282 }
1387 STATE_SET(RIGHT_CONTIG, 1283
1388 STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) && 1284 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1389 new_endoff == RIGHT.br_startoff && 1285 new_endoff == RIGHT.br_startoff &&
1390 new->br_startblock + new->br_blockcount == 1286 new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1391 RIGHT.br_startblock && 1287 newext == RIGHT.br_state &&
1392 newext == RIGHT.br_state && 1288 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1393 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && 1289 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1394 ((state & MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING)) != 1290 BMAP_RIGHT_FILLING)) !=
1395 MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING) || 1291 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1396 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount 1292 BMAP_RIGHT_FILLING) ||
1397 <= MAXEXTLEN)); 1293 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1294 <= MAXEXTLEN))
1295 state |= BMAP_RIGHT_CONTIG;
1296
1398 /* 1297 /*
1399 * Switch out based on the FILLING and CONTIG state bits. 1298 * Switch out based on the FILLING and CONTIG state bits.
1400 */ 1299 */
1401 switch (SWITCH_STATE) { 1300 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1402 1301 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1403 case MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1302 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1303 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1404 /* 1304 /*
1405 * Setting all of a previous oldext extent to newext. 1305 * Setting all of a previous oldext extent to newext.
1406 * The left and right neighbors are both contiguous with new. 1306 * The left and right neighbors are both contiguous with new.
1407 */ 1307 */
1408 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1, 1308 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
1409 XFS_DATA_FORK);
1410 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1309 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
1411 LEFT.br_blockcount + PREV.br_blockcount + 1310 LEFT.br_blockcount + PREV.br_blockcount +
1412 RIGHT.br_blockcount); 1311 RIGHT.br_blockcount);
1413 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1, 1312 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1414 XFS_DATA_FORK); 1313
1415 XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK); 1314 xfs_iext_remove(ip, idx, 2, state);
1416 xfs_iext_remove(ifp, idx, 2);
1417 ip->i_df.if_lastex = idx - 1; 1315 ip->i_df.if_lastex = idx - 1;
1418 ip->i_d.di_nextents -= 2; 1316 ip->i_d.di_nextents -= 2;
1419 if (cur == NULL) 1317 if (cur == NULL)
@@ -1450,20 +1348,18 @@ xfs_bmap_add_extent_unwritten_real(
1450 RIGHT.br_blockcount; 1348 RIGHT.br_blockcount;
1451 break; 1349 break;
1452 1350
1453 case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG): 1351 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1454 /* 1352 /*
1455 * Setting all of a previous oldext extent to newext. 1353 * Setting all of a previous oldext extent to newext.
1456 * The left neighbor is contiguous, the right is not. 1354 * The left neighbor is contiguous, the right is not.
1457 */ 1355 */
1458 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1, 1356 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
1459 XFS_DATA_FORK);
1460 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1357 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
1461 LEFT.br_blockcount + PREV.br_blockcount); 1358 LEFT.br_blockcount + PREV.br_blockcount);
1462 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1, 1359 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1463 XFS_DATA_FORK); 1360
1464 ip->i_df.if_lastex = idx - 1; 1361 ip->i_df.if_lastex = idx - 1;
1465 XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK); 1362 xfs_iext_remove(ip, idx, 1, state);
1466 xfs_iext_remove(ifp, idx, 1);
1467 ip->i_d.di_nextents--; 1363 ip->i_d.di_nextents--;
1468 if (cur == NULL) 1364 if (cur == NULL)
1469 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1365 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1492,21 +1388,18 @@ xfs_bmap_add_extent_unwritten_real(
1492 PREV.br_blockcount; 1388 PREV.br_blockcount;
1493 break; 1389 break;
1494 1390
1495 case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG): 1391 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1496 /* 1392 /*
1497 * Setting all of a previous oldext extent to newext. 1393 * Setting all of a previous oldext extent to newext.
1498 * The right neighbor is contiguous, the left is not. 1394 * The right neighbor is contiguous, the left is not.
1499 */ 1395 */
1500 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx, 1396 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1501 XFS_DATA_FORK);
1502 xfs_bmbt_set_blockcount(ep, 1397 xfs_bmbt_set_blockcount(ep,
1503 PREV.br_blockcount + RIGHT.br_blockcount); 1398 PREV.br_blockcount + RIGHT.br_blockcount);
1504 xfs_bmbt_set_state(ep, newext); 1399 xfs_bmbt_set_state(ep, newext);
1505 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx, 1400 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1506 XFS_DATA_FORK);
1507 ip->i_df.if_lastex = idx; 1401 ip->i_df.if_lastex = idx;
1508 XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK); 1402 xfs_iext_remove(ip, idx + 1, 1, state);
1509 xfs_iext_remove(ifp, idx + 1, 1);
1510 ip->i_d.di_nextents--; 1403 ip->i_d.di_nextents--;
1511 if (cur == NULL) 1404 if (cur == NULL)
1512 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1405 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1535,17 +1428,16 @@ xfs_bmap_add_extent_unwritten_real(
1535 RIGHT.br_blockcount; 1428 RIGHT.br_blockcount;
1536 break; 1429 break;
1537 1430
1538 case MASK2(LEFT_FILLING, RIGHT_FILLING): 1431 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1539 /* 1432 /*
1540 * Setting all of a previous oldext extent to newext. 1433 * Setting all of a previous oldext extent to newext.
1541 * Neither the left nor right neighbors are contiguous with 1434 * Neither the left nor right neighbors are contiguous with
1542 * the new one. 1435 * the new one.
1543 */ 1436 */
1544 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx, 1437 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1545 XFS_DATA_FORK);
1546 xfs_bmbt_set_state(ep, newext); 1438 xfs_bmbt_set_state(ep, newext);
1547 XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx, 1439 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1548 XFS_DATA_FORK); 1440
1549 ip->i_df.if_lastex = idx; 1441 ip->i_df.if_lastex = idx;
1550 if (cur == NULL) 1442 if (cur == NULL)
1551 rval = XFS_ILOG_DEXT; 1443 rval = XFS_ILOG_DEXT;
@@ -1566,27 +1458,25 @@ xfs_bmap_add_extent_unwritten_real(
1566 temp2 = new->br_blockcount; 1458 temp2 = new->br_blockcount;
1567 break; 1459 break;
1568 1460
1569 case MASK2(LEFT_FILLING, LEFT_CONTIG): 1461 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1570 /* 1462 /*
1571 * Setting the first part of a previous oldext extent to newext. 1463 * Setting the first part of a previous oldext extent to newext.
1572 * The left neighbor is contiguous. 1464 * The left neighbor is contiguous.
1573 */ 1465 */
1574 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1, 1466 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
1575 XFS_DATA_FORK);
1576 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1467 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
1577 LEFT.br_blockcount + new->br_blockcount); 1468 LEFT.br_blockcount + new->br_blockcount);
1578 xfs_bmbt_set_startoff(ep, 1469 xfs_bmbt_set_startoff(ep,
1579 PREV.br_startoff + new->br_blockcount); 1470 PREV.br_startoff + new->br_blockcount);
1580 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1, 1471 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1581 XFS_DATA_FORK); 1472
1582 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx, 1473 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1583 XFS_DATA_FORK);
1584 xfs_bmbt_set_startblock(ep, 1474 xfs_bmbt_set_startblock(ep,
1585 new->br_startblock + new->br_blockcount); 1475 new->br_startblock + new->br_blockcount);
1586 xfs_bmbt_set_blockcount(ep, 1476 xfs_bmbt_set_blockcount(ep,
1587 PREV.br_blockcount - new->br_blockcount); 1477 PREV.br_blockcount - new->br_blockcount);
1588 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx, 1478 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1589 XFS_DATA_FORK); 1479
1590 ip->i_df.if_lastex = idx - 1; 1480 ip->i_df.if_lastex = idx - 1;
1591 if (cur == NULL) 1481 if (cur == NULL)
1592 rval = XFS_ILOG_DEXT; 1482 rval = XFS_ILOG_DEXT;
@@ -1617,22 +1507,21 @@ xfs_bmap_add_extent_unwritten_real(
1617 PREV.br_blockcount; 1507 PREV.br_blockcount;
1618 break; 1508 break;
1619 1509
1620 case MASK(LEFT_FILLING): 1510 case BMAP_LEFT_FILLING:
1621 /* 1511 /*
1622 * Setting the first part of a previous oldext extent to newext. 1512 * Setting the first part of a previous oldext extent to newext.
1623 * The left neighbor is not contiguous. 1513 * The left neighbor is not contiguous.
1624 */ 1514 */
1625 XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK); 1515 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1626 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); 1516 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
1627 xfs_bmbt_set_startoff(ep, new_endoff); 1517 xfs_bmbt_set_startoff(ep, new_endoff);
1628 xfs_bmbt_set_blockcount(ep, 1518 xfs_bmbt_set_blockcount(ep,
1629 PREV.br_blockcount - new->br_blockcount); 1519 PREV.br_blockcount - new->br_blockcount);
1630 xfs_bmbt_set_startblock(ep, 1520 xfs_bmbt_set_startblock(ep,
1631 new->br_startblock + new->br_blockcount); 1521 new->br_startblock + new->br_blockcount);
1632 XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx, XFS_DATA_FORK); 1522 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1633 XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL, 1523
1634 XFS_DATA_FORK); 1524 xfs_iext_insert(ip, idx, 1, new, state);
1635 xfs_iext_insert(ifp, idx, 1, new);
1636 ip->i_df.if_lastex = idx; 1525 ip->i_df.if_lastex = idx;
1637 ip->i_d.di_nextents++; 1526 ip->i_d.di_nextents++;
1638 if (cur == NULL) 1527 if (cur == NULL)
@@ -1660,24 +1549,21 @@ xfs_bmap_add_extent_unwritten_real(
1660 temp2 = PREV.br_blockcount; 1549 temp2 = PREV.br_blockcount;
1661 break; 1550 break;
1662 1551
1663 case MASK2(RIGHT_FILLING, RIGHT_CONTIG): 1552 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1664 /* 1553 /*
1665 * Setting the last part of a previous oldext extent to newext. 1554 * Setting the last part of a previous oldext extent to newext.
1666 * The right neighbor is contiguous with the new allocation. 1555 * The right neighbor is contiguous with the new allocation.
1667 */ 1556 */
1668 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx, 1557 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1669 XFS_DATA_FORK); 1558 trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_);
1670 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1,
1671 XFS_DATA_FORK);
1672 xfs_bmbt_set_blockcount(ep, 1559 xfs_bmbt_set_blockcount(ep,
1673 PREV.br_blockcount - new->br_blockcount); 1560 PREV.br_blockcount - new->br_blockcount);
1674 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx, 1561 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1675 XFS_DATA_FORK);
1676 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), 1562 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
1677 new->br_startoff, new->br_startblock, 1563 new->br_startoff, new->br_startblock,
1678 new->br_blockcount + RIGHT.br_blockcount, newext); 1564 new->br_blockcount + RIGHT.br_blockcount, newext);
1679 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1, 1565 trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_);
1680 XFS_DATA_FORK); 1566
1681 ip->i_df.if_lastex = idx + 1; 1567 ip->i_df.if_lastex = idx + 1;
1682 if (cur == NULL) 1568 if (cur == NULL)
1683 rval = XFS_ILOG_DEXT; 1569 rval = XFS_ILOG_DEXT;
@@ -1707,18 +1593,17 @@ xfs_bmap_add_extent_unwritten_real(
1707 RIGHT.br_blockcount; 1593 RIGHT.br_blockcount;
1708 break; 1594 break;
1709 1595
1710 case MASK(RIGHT_FILLING): 1596 case BMAP_RIGHT_FILLING:
1711 /* 1597 /*
1712 * Setting the last part of a previous oldext extent to newext. 1598 * Setting the last part of a previous oldext extent to newext.
1713 * The right neighbor is not contiguous. 1599 * The right neighbor is not contiguous.
1714 */ 1600 */
1715 XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK); 1601 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1716 xfs_bmbt_set_blockcount(ep, 1602 xfs_bmbt_set_blockcount(ep,
1717 PREV.br_blockcount - new->br_blockcount); 1603 PREV.br_blockcount - new->br_blockcount);
1718 XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK); 1604 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1719 XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL, 1605
1720 XFS_DATA_FORK); 1606 xfs_iext_insert(ip, idx + 1, 1, new, state);
1721 xfs_iext_insert(ifp, idx + 1, 1, new);
1722 ip->i_df.if_lastex = idx + 1; 1607 ip->i_df.if_lastex = idx + 1;
1723 ip->i_d.di_nextents++; 1608 ip->i_d.di_nextents++;
1724 if (cur == NULL) 1609 if (cur == NULL)
@@ -1756,19 +1641,18 @@ xfs_bmap_add_extent_unwritten_real(
1756 * newext. Contiguity is impossible here. 1641 * newext. Contiguity is impossible here.
1757 * One extent becomes three extents. 1642 * One extent becomes three extents.
1758 */ 1643 */
1759 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK); 1644 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1760 xfs_bmbt_set_blockcount(ep, 1645 xfs_bmbt_set_blockcount(ep,
1761 new->br_startoff - PREV.br_startoff); 1646 new->br_startoff - PREV.br_startoff);
1762 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK); 1647 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1648
1763 r[0] = *new; 1649 r[0] = *new;
1764 r[1].br_startoff = new_endoff; 1650 r[1].br_startoff = new_endoff;
1765 r[1].br_blockcount = 1651 r[1].br_blockcount =
1766 PREV.br_startoff + PREV.br_blockcount - new_endoff; 1652 PREV.br_startoff + PREV.br_blockcount - new_endoff;
1767 r[1].br_startblock = new->br_startblock + new->br_blockcount; 1653 r[1].br_startblock = new->br_startblock + new->br_blockcount;
1768 r[1].br_state = oldext; 1654 r[1].br_state = oldext;
1769 XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1], 1655 xfs_iext_insert(ip, idx + 1, 2, &r[0], state);
1770 XFS_DATA_FORK);
1771 xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
1772 ip->i_df.if_lastex = idx + 1; 1656 ip->i_df.if_lastex = idx + 1;
1773 ip->i_d.di_nextents += 2; 1657 ip->i_d.di_nextents += 2;
1774 if (cur == NULL) 1658 if (cur == NULL)
@@ -1813,13 +1697,13 @@ xfs_bmap_add_extent_unwritten_real(
1813 temp2 = PREV.br_blockcount; 1697 temp2 = PREV.br_blockcount;
1814 break; 1698 break;
1815 1699
1816 case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1700 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1817 case MASK3(RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1701 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1818 case MASK2(LEFT_FILLING, RIGHT_CONTIG): 1702 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1819 case MASK2(RIGHT_FILLING, LEFT_CONTIG): 1703 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1820 case MASK2(LEFT_CONTIG, RIGHT_CONTIG): 1704 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1821 case MASK(LEFT_CONTIG): 1705 case BMAP_LEFT_CONTIG:
1822 case MASK(RIGHT_CONTIG): 1706 case BMAP_RIGHT_CONTIG:
1823 /* 1707 /*
1824 * These cases are all impossible. 1708 * These cases are all impossible.
1825 */ 1709 */
@@ -1839,14 +1723,6 @@ done:
1839#undef LEFT 1723#undef LEFT
1840#undef RIGHT 1724#undef RIGHT
1841#undef PREV 1725#undef PREV
1842#undef MASK
1843#undef MASK2
1844#undef MASK3
1845#undef MASK4
1846#undef STATE_SET
1847#undef STATE_TEST
1848#undef STATE_SET_TEST
1849#undef SWITCH_STATE
1850} 1726}
1851 1727
1852/* 1728/*
@@ -1872,62 +1748,57 @@ xfs_bmap_add_extent_hole_delay(
1872 int state; /* state bits, accessed thru macros */ 1748 int state; /* state bits, accessed thru macros */
1873 xfs_filblks_t temp=0; /* temp for indirect calculations */ 1749 xfs_filblks_t temp=0; /* temp for indirect calculations */
1874 xfs_filblks_t temp2=0; 1750 xfs_filblks_t temp2=0;
1875 enum { /* bit number definitions for state */
1876 LEFT_CONTIG, RIGHT_CONTIG,
1877 LEFT_DELAY, RIGHT_DELAY,
1878 LEFT_VALID, RIGHT_VALID
1879 };
1880
1881#define MASK(b) (1 << (b))
1882#define MASK2(a,b) (MASK(a) | MASK(b))
1883#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
1884#define STATE_TEST(b) (state & MASK(b))
1885#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \
1886 ((state &= ~MASK(b)), 0))
1887#define SWITCH_STATE (state & MASK2(LEFT_CONTIG, RIGHT_CONTIG))
1888 1751
1889 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 1752 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1890 ep = xfs_iext_get_ext(ifp, idx); 1753 ep = xfs_iext_get_ext(ifp, idx);
1891 state = 0; 1754 state = 0;
1892 ASSERT(isnullstartblock(new->br_startblock)); 1755 ASSERT(isnullstartblock(new->br_startblock));
1756
1893 /* 1757 /*
1894 * Check and set flags if this segment has a left neighbor 1758 * Check and set flags if this segment has a left neighbor
1895 */ 1759 */
1896 if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { 1760 if (idx > 0) {
1761 state |= BMAP_LEFT_VALID;
1897 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); 1762 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left);
1898 STATE_SET(LEFT_DELAY, isnullstartblock(left.br_startblock)); 1763
1764 if (isnullstartblock(left.br_startblock))
1765 state |= BMAP_LEFT_DELAY;
1899 } 1766 }
1767
1900 /* 1768 /*
1901 * Check and set flags if the current (right) segment exists. 1769 * Check and set flags if the current (right) segment exists.
1902 * If it doesn't exist, we're converting the hole at end-of-file. 1770 * If it doesn't exist, we're converting the hole at end-of-file.
1903 */ 1771 */
1904 if (STATE_SET_TEST(RIGHT_VALID, 1772 if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
1905 idx < 1773 state |= BMAP_RIGHT_VALID;
1906 ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
1907 xfs_bmbt_get_all(ep, &right); 1774 xfs_bmbt_get_all(ep, &right);
1908 STATE_SET(RIGHT_DELAY, isnullstartblock(right.br_startblock)); 1775
1776 if (isnullstartblock(right.br_startblock))
1777 state |= BMAP_RIGHT_DELAY;
1909 } 1778 }
1779
1910 /* 1780 /*
1911 * Set contiguity flags on the left and right neighbors. 1781 * Set contiguity flags on the left and right neighbors.
1912 * Don't let extents get too large, even if the pieces are contiguous. 1782 * Don't let extents get too large, even if the pieces are contiguous.
1913 */ 1783 */
1914 STATE_SET(LEFT_CONTIG, 1784 if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
1915 STATE_TEST(LEFT_VALID) && STATE_TEST(LEFT_DELAY) && 1785 left.br_startoff + left.br_blockcount == new->br_startoff &&
1916 left.br_startoff + left.br_blockcount == new->br_startoff && 1786 left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1917 left.br_blockcount + new->br_blockcount <= MAXEXTLEN); 1787 state |= BMAP_LEFT_CONTIG;
1918 STATE_SET(RIGHT_CONTIG, 1788
1919 STATE_TEST(RIGHT_VALID) && STATE_TEST(RIGHT_DELAY) && 1789 if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
1920 new->br_startoff + new->br_blockcount == right.br_startoff && 1790 new->br_startoff + new->br_blockcount == right.br_startoff &&
1921 new->br_blockcount + right.br_blockcount <= MAXEXTLEN && 1791 new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
1922 (!STATE_TEST(LEFT_CONTIG) || 1792 (!(state & BMAP_LEFT_CONTIG) ||
1923 (left.br_blockcount + new->br_blockcount + 1793 (left.br_blockcount + new->br_blockcount +
1924 right.br_blockcount <= MAXEXTLEN))); 1794 right.br_blockcount <= MAXEXTLEN)))
1795 state |= BMAP_RIGHT_CONTIG;
1796
1925 /* 1797 /*
1926 * Switch out based on the contiguity flags. 1798 * Switch out based on the contiguity flags.
1927 */ 1799 */
1928 switch (SWITCH_STATE) { 1800 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
1929 1801 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1930 case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
1931 /* 1802 /*
1932 * New allocation is contiguous with delayed allocations 1803 * New allocation is contiguous with delayed allocations
1933 * on the left and on the right. 1804 * on the left and on the right.
@@ -1935,8 +1806,8 @@ xfs_bmap_add_extent_hole_delay(
1935 */ 1806 */
1936 temp = left.br_blockcount + new->br_blockcount + 1807 temp = left.br_blockcount + new->br_blockcount +
1937 right.br_blockcount; 1808 right.br_blockcount;
1938 XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1, 1809
1939 XFS_DATA_FORK); 1810 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
1940 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); 1811 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
1941 oldlen = startblockval(left.br_startblock) + 1812 oldlen = startblockval(left.br_startblock) +
1942 startblockval(new->br_startblock) + 1813 startblockval(new->br_startblock) +
@@ -1944,53 +1815,52 @@ xfs_bmap_add_extent_hole_delay(
1944 newlen = xfs_bmap_worst_indlen(ip, temp); 1815 newlen = xfs_bmap_worst_indlen(ip, temp);
1945 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), 1816 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
1946 nullstartblock((int)newlen)); 1817 nullstartblock((int)newlen));
1947 XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1, 1818 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1948 XFS_DATA_FORK); 1819
1949 XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, XFS_DATA_FORK); 1820 xfs_iext_remove(ip, idx, 1, state);
1950 xfs_iext_remove(ifp, idx, 1);
1951 ip->i_df.if_lastex = idx - 1; 1821 ip->i_df.if_lastex = idx - 1;
1952 /* DELTA: Two in-core extents were replaced by one. */ 1822 /* DELTA: Two in-core extents were replaced by one. */
1953 temp2 = temp; 1823 temp2 = temp;
1954 temp = left.br_startoff; 1824 temp = left.br_startoff;
1955 break; 1825 break;
1956 1826
1957 case MASK(LEFT_CONTIG): 1827 case BMAP_LEFT_CONTIG:
1958 /* 1828 /*
1959 * New allocation is contiguous with a delayed allocation 1829 * New allocation is contiguous with a delayed allocation
1960 * on the left. 1830 * on the left.
1961 * Merge the new allocation with the left neighbor. 1831 * Merge the new allocation with the left neighbor.
1962 */ 1832 */
1963 temp = left.br_blockcount + new->br_blockcount; 1833 temp = left.br_blockcount + new->br_blockcount;
1964 XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1, 1834 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
1965 XFS_DATA_FORK);
1966 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); 1835 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
1967 oldlen = startblockval(left.br_startblock) + 1836 oldlen = startblockval(left.br_startblock) +
1968 startblockval(new->br_startblock); 1837 startblockval(new->br_startblock);
1969 newlen = xfs_bmap_worst_indlen(ip, temp); 1838 newlen = xfs_bmap_worst_indlen(ip, temp);
1970 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), 1839 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
1971 nullstartblock((int)newlen)); 1840 nullstartblock((int)newlen));
1972 XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1, 1841 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1973 XFS_DATA_FORK); 1842
1974 ip->i_df.if_lastex = idx - 1; 1843 ip->i_df.if_lastex = idx - 1;
1975 /* DELTA: One in-core extent grew into a hole. */ 1844 /* DELTA: One in-core extent grew into a hole. */
1976 temp2 = temp; 1845 temp2 = temp;
1977 temp = left.br_startoff; 1846 temp = left.br_startoff;
1978 break; 1847 break;
1979 1848
1980 case MASK(RIGHT_CONTIG): 1849 case BMAP_RIGHT_CONTIG:
1981 /* 1850 /*
1982 * New allocation is contiguous with a delayed allocation 1851 * New allocation is contiguous with a delayed allocation
1983 * on the right. 1852 * on the right.
1984 * Merge the new allocation with the right neighbor. 1853 * Merge the new allocation with the right neighbor.
1985 */ 1854 */
1986 XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, XFS_DATA_FORK); 1855 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1987 temp = new->br_blockcount + right.br_blockcount; 1856 temp = new->br_blockcount + right.br_blockcount;
1988 oldlen = startblockval(new->br_startblock) + 1857 oldlen = startblockval(new->br_startblock) +
1989 startblockval(right.br_startblock); 1858 startblockval(right.br_startblock);
1990 newlen = xfs_bmap_worst_indlen(ip, temp); 1859 newlen = xfs_bmap_worst_indlen(ip, temp);
1991 xfs_bmbt_set_allf(ep, new->br_startoff, 1860 xfs_bmbt_set_allf(ep, new->br_startoff,
1992 nullstartblock((int)newlen), temp, right.br_state); 1861 nullstartblock((int)newlen), temp, right.br_state);
1993 XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, XFS_DATA_FORK); 1862 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1863
1994 ip->i_df.if_lastex = idx; 1864 ip->i_df.if_lastex = idx;
1995 /* DELTA: One in-core extent grew into a hole. */ 1865 /* DELTA: One in-core extent grew into a hole. */
1996 temp2 = temp; 1866 temp2 = temp;
@@ -2004,9 +1874,7 @@ xfs_bmap_add_extent_hole_delay(
2004 * Insert a new entry. 1874 * Insert a new entry.
2005 */ 1875 */
2006 oldlen = newlen = 0; 1876 oldlen = newlen = 0;
2007 XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL, 1877 xfs_iext_insert(ip, idx, 1, new, state);
2008 XFS_DATA_FORK);
2009 xfs_iext_insert(ifp, idx, 1, new);
2010 ip->i_df.if_lastex = idx; 1878 ip->i_df.if_lastex = idx;
2011 /* DELTA: A new in-core extent was added in a hole. */ 1879 /* DELTA: A new in-core extent was added in a hole. */
2012 temp2 = new->br_blockcount; 1880 temp2 = new->br_blockcount;
@@ -2030,12 +1898,6 @@ xfs_bmap_add_extent_hole_delay(
2030 } 1898 }
2031 *logflagsp = 0; 1899 *logflagsp = 0;
2032 return 0; 1900 return 0;
2033#undef MASK
2034#undef MASK2
2035#undef STATE_SET
2036#undef STATE_TEST
2037#undef STATE_SET_TEST
2038#undef SWITCH_STATE
2039} 1901}
2040 1902
2041/* 1903/*
@@ -2062,83 +1924,75 @@ xfs_bmap_add_extent_hole_real(
2062 int state; /* state bits, accessed thru macros */ 1924 int state; /* state bits, accessed thru macros */
2063 xfs_filblks_t temp=0; 1925 xfs_filblks_t temp=0;
2064 xfs_filblks_t temp2=0; 1926 xfs_filblks_t temp2=0;
2065 enum { /* bit number definitions for state */
2066 LEFT_CONTIG, RIGHT_CONTIG,
2067 LEFT_DELAY, RIGHT_DELAY,
2068 LEFT_VALID, RIGHT_VALID
2069 };
2070
2071#define MASK(b) (1 << (b))
2072#define MASK2(a,b) (MASK(a) | MASK(b))
2073#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
2074#define STATE_TEST(b) (state & MASK(b))
2075#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \
2076 ((state &= ~MASK(b)), 0))
2077#define SWITCH_STATE (state & MASK2(LEFT_CONTIG, RIGHT_CONTIG))
2078 1927
2079 ifp = XFS_IFORK_PTR(ip, whichfork); 1928 ifp = XFS_IFORK_PTR(ip, whichfork);
2080 ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); 1929 ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
2081 ep = xfs_iext_get_ext(ifp, idx); 1930 ep = xfs_iext_get_ext(ifp, idx);
2082 state = 0; 1931 state = 0;
1932
1933 if (whichfork == XFS_ATTR_FORK)
1934 state |= BMAP_ATTRFORK;
1935
2083 /* 1936 /*
2084 * Check and set flags if this segment has a left neighbor. 1937 * Check and set flags if this segment has a left neighbor.
2085 */ 1938 */
2086 if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { 1939 if (idx > 0) {
1940 state |= BMAP_LEFT_VALID;
2087 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); 1941 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left);
2088 STATE_SET(LEFT_DELAY, isnullstartblock(left.br_startblock)); 1942 if (isnullstartblock(left.br_startblock))
1943 state |= BMAP_LEFT_DELAY;
2089 } 1944 }
1945
2090 /* 1946 /*
2091 * Check and set flags if this segment has a current value. 1947 * Check and set flags if this segment has a current value.
2092 * Not true if we're inserting into the "hole" at eof. 1948 * Not true if we're inserting into the "hole" at eof.
2093 */ 1949 */
2094 if (STATE_SET_TEST(RIGHT_VALID, 1950 if (idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
2095 idx < 1951 state |= BMAP_RIGHT_VALID;
2096 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
2097 xfs_bmbt_get_all(ep, &right); 1952 xfs_bmbt_get_all(ep, &right);
2098 STATE_SET(RIGHT_DELAY, isnullstartblock(right.br_startblock)); 1953 if (isnullstartblock(right.br_startblock))
1954 state |= BMAP_RIGHT_DELAY;
2099 } 1955 }
1956
2100 /* 1957 /*
2101 * We're inserting a real allocation between "left" and "right". 1958 * We're inserting a real allocation between "left" and "right".
2102 * Set the contiguity flags. Don't let extents get too large. 1959 * Set the contiguity flags. Don't let extents get too large.
2103 */ 1960 */
2104 STATE_SET(LEFT_CONTIG, 1961 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2105 STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) && 1962 left.br_startoff + left.br_blockcount == new->br_startoff &&
2106 left.br_startoff + left.br_blockcount == new->br_startoff && 1963 left.br_startblock + left.br_blockcount == new->br_startblock &&
2107 left.br_startblock + left.br_blockcount == new->br_startblock && 1964 left.br_state == new->br_state &&
2108 left.br_state == new->br_state && 1965 left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2109 left.br_blockcount + new->br_blockcount <= MAXEXTLEN); 1966 state |= BMAP_LEFT_CONTIG;
2110 STATE_SET(RIGHT_CONTIG, 1967
2111 STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) && 1968 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2112 new->br_startoff + new->br_blockcount == right.br_startoff && 1969 new->br_startoff + new->br_blockcount == right.br_startoff &&
2113 new->br_startblock + new->br_blockcount == 1970 new->br_startblock + new->br_blockcount == right.br_startblock &&
2114 right.br_startblock && 1971 new->br_state == right.br_state &&
2115 new->br_state == right.br_state && 1972 new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2116 new->br_blockcount + right.br_blockcount <= MAXEXTLEN && 1973 (!(state & BMAP_LEFT_CONTIG) ||
2117 (!STATE_TEST(LEFT_CONTIG) || 1974 left.br_blockcount + new->br_blockcount +
2118 left.br_blockcount + new->br_blockcount + 1975 right.br_blockcount <= MAXEXTLEN))
2119 right.br_blockcount <= MAXEXTLEN)); 1976 state |= BMAP_RIGHT_CONTIG;
2120 1977
2121 error = 0; 1978 error = 0;
2122 /* 1979 /*
2123 * Select which case we're in here, and implement it. 1980 * Select which case we're in here, and implement it.
2124 */ 1981 */
2125 switch (SWITCH_STATE) { 1982 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2126 1983 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2127 case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
2128 /* 1984 /*
2129 * New allocation is contiguous with real allocations on the 1985 * New allocation is contiguous with real allocations on the
2130 * left and on the right. 1986 * left and on the right.
2131 * Merge all three into a single extent record. 1987 * Merge all three into a single extent record.
2132 */ 1988 */
2133 XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1, 1989 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
2134 whichfork);
2135 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1990 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
2136 left.br_blockcount + new->br_blockcount + 1991 left.br_blockcount + new->br_blockcount +
2137 right.br_blockcount); 1992 right.br_blockcount);
2138 XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1, 1993 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
2139 whichfork); 1994
2140 XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, whichfork); 1995 xfs_iext_remove(ip, idx, 1, state);
2141 xfs_iext_remove(ifp, idx, 1);
2142 ifp->if_lastex = idx - 1; 1996 ifp->if_lastex = idx - 1;
2143 XFS_IFORK_NEXT_SET(ip, whichfork, 1997 XFS_IFORK_NEXT_SET(ip, whichfork,
2144 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 1998 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
@@ -2173,16 +2027,17 @@ xfs_bmap_add_extent_hole_real(
2173 right.br_blockcount; 2027 right.br_blockcount;
2174 break; 2028 break;
2175 2029
2176 case MASK(LEFT_CONTIG): 2030 case BMAP_LEFT_CONTIG:
2177 /* 2031 /*
2178 * New allocation is contiguous with a real allocation 2032 * New allocation is contiguous with a real allocation
2179 * on the left. 2033 * on the left.
2180 * Merge the new allocation with the left neighbor. 2034 * Merge the new allocation with the left neighbor.
2181 */ 2035 */
2182 XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1, whichfork); 2036 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
2183 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 2037 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
2184 left.br_blockcount + new->br_blockcount); 2038 left.br_blockcount + new->br_blockcount);
2185 XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1, whichfork); 2039 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
2040
2186 ifp->if_lastex = idx - 1; 2041 ifp->if_lastex = idx - 1;
2187 if (cur == NULL) { 2042 if (cur == NULL) {
2188 rval = xfs_ilog_fext(whichfork); 2043 rval = xfs_ilog_fext(whichfork);
@@ -2207,17 +2062,18 @@ xfs_bmap_add_extent_hole_real(
2207 new->br_blockcount; 2062 new->br_blockcount;
2208 break; 2063 break;
2209 2064
2210 case MASK(RIGHT_CONTIG): 2065 case BMAP_RIGHT_CONTIG:
2211 /* 2066 /*
2212 * New allocation is contiguous with a real allocation 2067 * New allocation is contiguous with a real allocation
2213 * on the right. 2068 * on the right.
2214 * Merge the new allocation with the right neighbor. 2069 * Merge the new allocation with the right neighbor.
2215 */ 2070 */
2216 XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, whichfork); 2071 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
2217 xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock, 2072 xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock,
2218 new->br_blockcount + right.br_blockcount, 2073 new->br_blockcount + right.br_blockcount,
2219 right.br_state); 2074 right.br_state);
2220 XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, whichfork); 2075 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
2076
2221 ifp->if_lastex = idx; 2077 ifp->if_lastex = idx;
2222 if (cur == NULL) { 2078 if (cur == NULL) {
2223 rval = xfs_ilog_fext(whichfork); 2079 rval = xfs_ilog_fext(whichfork);
@@ -2248,8 +2104,7 @@ xfs_bmap_add_extent_hole_real(
2248 * real allocation. 2104 * real allocation.
2249 * Insert a new entry. 2105 * Insert a new entry.
2250 */ 2106 */
2251 XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL, whichfork); 2107 xfs_iext_insert(ip, idx, 1, new, state);
2252 xfs_iext_insert(ifp, idx, 1, new);
2253 ifp->if_lastex = idx; 2108 ifp->if_lastex = idx;
2254 XFS_IFORK_NEXT_SET(ip, whichfork, 2109 XFS_IFORK_NEXT_SET(ip, whichfork,
2255 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2110 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
@@ -2283,12 +2138,6 @@ xfs_bmap_add_extent_hole_real(
2283done: 2138done:
2284 *logflagsp = rval; 2139 *logflagsp = rval;
2285 return error; 2140 return error;
2286#undef MASK
2287#undef MASK2
2288#undef STATE_SET
2289#undef STATE_TEST
2290#undef STATE_SET_TEST
2291#undef SWITCH_STATE
2292} 2141}
2293 2142
2294/* 2143/*
@@ -3115,8 +2964,13 @@ xfs_bmap_del_extent(
3115 uint qfield; /* quota field to update */ 2964 uint qfield; /* quota field to update */
3116 xfs_filblks_t temp; /* for indirect length calculations */ 2965 xfs_filblks_t temp; /* for indirect length calculations */
3117 xfs_filblks_t temp2; /* for indirect length calculations */ 2966 xfs_filblks_t temp2; /* for indirect length calculations */
2967 int state = 0;
3118 2968
3119 XFS_STATS_INC(xs_del_exlist); 2969 XFS_STATS_INC(xs_del_exlist);
2970
2971 if (whichfork == XFS_ATTR_FORK)
2972 state |= BMAP_ATTRFORK;
2973
3120 mp = ip->i_mount; 2974 mp = ip->i_mount;
3121 ifp = XFS_IFORK_PTR(ip, whichfork); 2975 ifp = XFS_IFORK_PTR(ip, whichfork);
3122 ASSERT((idx >= 0) && (idx < ifp->if_bytes / 2976 ASSERT((idx >= 0) && (idx < ifp->if_bytes /
@@ -3196,8 +3050,8 @@ xfs_bmap_del_extent(
3196 /* 3050 /*
3197 * Matches the whole extent. Delete the entry. 3051 * Matches the whole extent. Delete the entry.
3198 */ 3052 */
3199 XFS_BMAP_TRACE_DELETE("3", ip, idx, 1, whichfork); 3053 xfs_iext_remove(ip, idx, 1,
3200 xfs_iext_remove(ifp, idx, 1); 3054 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
3201 ifp->if_lastex = idx; 3055 ifp->if_lastex = idx;
3202 if (delay) 3056 if (delay)
3203 break; 3057 break;
@@ -3217,7 +3071,7 @@ xfs_bmap_del_extent(
3217 /* 3071 /*
3218 * Deleting the first part of the extent. 3072 * Deleting the first part of the extent.
3219 */ 3073 */
3220 XFS_BMAP_TRACE_PRE_UPDATE("2", ip, idx, whichfork); 3074 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
3221 xfs_bmbt_set_startoff(ep, del_endoff); 3075 xfs_bmbt_set_startoff(ep, del_endoff);
3222 temp = got.br_blockcount - del->br_blockcount; 3076 temp = got.br_blockcount - del->br_blockcount;
3223 xfs_bmbt_set_blockcount(ep, temp); 3077 xfs_bmbt_set_blockcount(ep, temp);
@@ -3226,13 +3080,12 @@ xfs_bmap_del_extent(
3226 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 3080 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
3227 da_old); 3081 da_old);
3228 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 3082 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
3229 XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx, 3083 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
3230 whichfork);
3231 da_new = temp; 3084 da_new = temp;
3232 break; 3085 break;
3233 } 3086 }
3234 xfs_bmbt_set_startblock(ep, del_endblock); 3087 xfs_bmbt_set_startblock(ep, del_endblock);
3235 XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx, whichfork); 3088 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
3236 if (!cur) { 3089 if (!cur) {
3237 flags |= xfs_ilog_fext(whichfork); 3090 flags |= xfs_ilog_fext(whichfork);
3238 break; 3091 break;
@@ -3248,19 +3101,18 @@ xfs_bmap_del_extent(
3248 * Deleting the last part of the extent. 3101 * Deleting the last part of the extent.
3249 */ 3102 */
3250 temp = got.br_blockcount - del->br_blockcount; 3103 temp = got.br_blockcount - del->br_blockcount;
3251 XFS_BMAP_TRACE_PRE_UPDATE("1", ip, idx, whichfork); 3104 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
3252 xfs_bmbt_set_blockcount(ep, temp); 3105 xfs_bmbt_set_blockcount(ep, temp);
3253 ifp->if_lastex = idx; 3106 ifp->if_lastex = idx;
3254 if (delay) { 3107 if (delay) {
3255 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 3108 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
3256 da_old); 3109 da_old);
3257 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 3110 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
3258 XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx, 3111 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
3259 whichfork);
3260 da_new = temp; 3112 da_new = temp;
3261 break; 3113 break;
3262 } 3114 }
3263 XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx, whichfork); 3115 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
3264 if (!cur) { 3116 if (!cur) {
3265 flags |= xfs_ilog_fext(whichfork); 3117 flags |= xfs_ilog_fext(whichfork);
3266 break; 3118 break;
@@ -3277,7 +3129,7 @@ xfs_bmap_del_extent(
3277 * Deleting the middle of the extent. 3129 * Deleting the middle of the extent.
3278 */ 3130 */
3279 temp = del->br_startoff - got.br_startoff; 3131 temp = del->br_startoff - got.br_startoff;
3280 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, whichfork); 3132 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
3281 xfs_bmbt_set_blockcount(ep, temp); 3133 xfs_bmbt_set_blockcount(ep, temp);
3282 new.br_startoff = del_endoff; 3134 new.br_startoff = del_endoff;
3283 temp2 = got_endoff - del_endoff; 3135 temp2 = got_endoff - del_endoff;
@@ -3364,10 +3216,8 @@ xfs_bmap_del_extent(
3364 } 3216 }
3365 } 3217 }
3366 } 3218 }
3367 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, whichfork); 3219 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
3368 XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 1, &new, NULL, 3220 xfs_iext_insert(ip, idx + 1, 1, &new, state);
3369 whichfork);
3370 xfs_iext_insert(ifp, idx + 1, 1, &new);
3371 ifp->if_lastex = idx + 1; 3221 ifp->if_lastex = idx + 1;
3372 break; 3222 break;
3373 } 3223 }
@@ -3687,7 +3537,9 @@ xfs_bmap_local_to_extents(
3687 xfs_iext_add(ifp, 0, 1); 3537 xfs_iext_add(ifp, 0, 1);
3688 ep = xfs_iext_get_ext(ifp, 0); 3538 ep = xfs_iext_get_ext(ifp, 0);
3689 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); 3539 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
3690 XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork); 3540 trace_xfs_bmap_post_update(ip, 0,
3541 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
3542 _THIS_IP_);
3691 XFS_IFORK_NEXT_SET(ip, whichfork, 1); 3543 XFS_IFORK_NEXT_SET(ip, whichfork, 1);
3692 ip->i_d.di_nblocks = 1; 3544 ip->i_d.di_nblocks = 1;
3693 xfs_trans_mod_dquot_byino(tp, ip, 3545 xfs_trans_mod_dquot_byino(tp, ip,
@@ -3800,158 +3652,6 @@ xfs_bmap_search_extents(
3800 return ep; 3652 return ep;
3801} 3653}
3802 3654
3803
3804#ifdef XFS_BMAP_TRACE
3805ktrace_t *xfs_bmap_trace_buf;
3806
3807/*
3808 * Add a bmap trace buffer entry. Base routine for the others.
3809 */
3810STATIC void
3811xfs_bmap_trace_addentry(
3812 int opcode, /* operation */
3813 const char *fname, /* function name */
3814 char *desc, /* operation description */
3815 xfs_inode_t *ip, /* incore inode pointer */
3816 xfs_extnum_t idx, /* index of entry(ies) */
3817 xfs_extnum_t cnt, /* count of entries, 1 or 2 */
3818 xfs_bmbt_rec_host_t *r1, /* first record */
3819 xfs_bmbt_rec_host_t *r2, /* second record or null */
3820 int whichfork) /* data or attr fork */
3821{
3822 xfs_bmbt_rec_host_t tr2;
3823
3824 ASSERT(cnt == 1 || cnt == 2);
3825 ASSERT(r1 != NULL);
3826 if (cnt == 1) {
3827 ASSERT(r2 == NULL);
3828 r2 = &tr2;
3829 memset(&tr2, 0, sizeof(tr2));
3830 } else
3831 ASSERT(r2 != NULL);
3832 ktrace_enter(xfs_bmap_trace_buf,
3833 (void *)(__psint_t)(opcode | (whichfork << 16)),
3834 (void *)fname, (void *)desc, (void *)ip,
3835 (void *)(__psint_t)idx,
3836 (void *)(__psint_t)cnt,
3837 (void *)(__psunsigned_t)(ip->i_ino >> 32),
3838 (void *)(__psunsigned_t)(unsigned)ip->i_ino,
3839 (void *)(__psunsigned_t)(r1->l0 >> 32),
3840 (void *)(__psunsigned_t)(unsigned)(r1->l0),
3841 (void *)(__psunsigned_t)(r1->l1 >> 32),
3842 (void *)(__psunsigned_t)(unsigned)(r1->l1),
3843 (void *)(__psunsigned_t)(r2->l0 >> 32),
3844 (void *)(__psunsigned_t)(unsigned)(r2->l0),
3845 (void *)(__psunsigned_t)(r2->l1 >> 32),
3846 (void *)(__psunsigned_t)(unsigned)(r2->l1)
3847 );
3848 ASSERT(ip->i_xtrace);
3849 ktrace_enter(ip->i_xtrace,
3850 (void *)(__psint_t)(opcode | (whichfork << 16)),
3851 (void *)fname, (void *)desc, (void *)ip,
3852 (void *)(__psint_t)idx,
3853 (void *)(__psint_t)cnt,
3854 (void *)(__psunsigned_t)(ip->i_ino >> 32),
3855 (void *)(__psunsigned_t)(unsigned)ip->i_ino,
3856 (void *)(__psunsigned_t)(r1->l0 >> 32),
3857 (void *)(__psunsigned_t)(unsigned)(r1->l0),
3858 (void *)(__psunsigned_t)(r1->l1 >> 32),
3859 (void *)(__psunsigned_t)(unsigned)(r1->l1),
3860 (void *)(__psunsigned_t)(r2->l0 >> 32),
3861 (void *)(__psunsigned_t)(unsigned)(r2->l0),
3862 (void *)(__psunsigned_t)(r2->l1 >> 32),
3863 (void *)(__psunsigned_t)(unsigned)(r2->l1)
3864 );
3865}
3866
3867/*
3868 * Add bmap trace entry prior to a call to xfs_iext_remove.
3869 */
3870STATIC void
3871xfs_bmap_trace_delete(
3872 const char *fname, /* function name */
3873 char *desc, /* operation description */
3874 xfs_inode_t *ip, /* incore inode pointer */
3875 xfs_extnum_t idx, /* index of entry(entries) deleted */
3876 xfs_extnum_t cnt, /* count of entries deleted, 1 or 2 */
3877 int whichfork) /* data or attr fork */
3878{
3879 xfs_ifork_t *ifp; /* inode fork pointer */
3880
3881 ifp = XFS_IFORK_PTR(ip, whichfork);
3882 xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_DELETE, fname, desc, ip, idx,
3883 cnt, xfs_iext_get_ext(ifp, idx),
3884 cnt == 2 ? xfs_iext_get_ext(ifp, idx + 1) : NULL,
3885 whichfork);
3886}
3887
3888/*
3889 * Add bmap trace entry prior to a call to xfs_iext_insert, or
3890 * reading in the extents list from the disk (in the btree).
3891 */
3892STATIC void
3893xfs_bmap_trace_insert(
3894 const char *fname, /* function name */
3895 char *desc, /* operation description */
3896 xfs_inode_t *ip, /* incore inode pointer */
3897 xfs_extnum_t idx, /* index of entry(entries) inserted */
3898 xfs_extnum_t cnt, /* count of entries inserted, 1 or 2 */
3899 xfs_bmbt_irec_t *r1, /* inserted record 1 */
3900 xfs_bmbt_irec_t *r2, /* inserted record 2 or null */
3901 int whichfork) /* data or attr fork */
3902{
3903 xfs_bmbt_rec_host_t tr1; /* compressed record 1 */
3904 xfs_bmbt_rec_host_t tr2; /* compressed record 2 if needed */
3905
3906 xfs_bmbt_set_all(&tr1, r1);
3907 if (cnt == 2) {
3908 ASSERT(r2 != NULL);
3909 xfs_bmbt_set_all(&tr2, r2);
3910 } else {
3911 ASSERT(cnt == 1);
3912 ASSERT(r2 == NULL);
3913 }
3914 xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_INSERT, fname, desc, ip, idx,
3915 cnt, &tr1, cnt == 2 ? &tr2 : NULL, whichfork);
3916}
3917
3918/*
3919 * Add bmap trace entry after updating an extent record in place.
3920 */
3921STATIC void
3922xfs_bmap_trace_post_update(
3923 const char *fname, /* function name */
3924 char *desc, /* operation description */
3925 xfs_inode_t *ip, /* incore inode pointer */
3926 xfs_extnum_t idx, /* index of entry updated */
3927 int whichfork) /* data or attr fork */
3928{
3929 xfs_ifork_t *ifp; /* inode fork pointer */
3930
3931 ifp = XFS_IFORK_PTR(ip, whichfork);
3932 xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_POST_UP, fname, desc, ip, idx,
3933 1, xfs_iext_get_ext(ifp, idx), NULL, whichfork);
3934}
3935
3936/*
3937 * Add bmap trace entry prior to updating an extent record in place.
3938 */
3939STATIC void
3940xfs_bmap_trace_pre_update(
3941 const char *fname, /* function name */
3942 char *desc, /* operation description */
3943 xfs_inode_t *ip, /* incore inode pointer */
3944 xfs_extnum_t idx, /* index of entry to be updated */
3945 int whichfork) /* data or attr fork */
3946{
3947 xfs_ifork_t *ifp; /* inode fork pointer */
3948
3949 ifp = XFS_IFORK_PTR(ip, whichfork);
3950 xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_PRE_UP, fname, desc, ip, idx, 1,
3951 xfs_iext_get_ext(ifp, idx), NULL, whichfork);
3952}
3953#endif /* XFS_BMAP_TRACE */
3954
3955/* 3655/*
3956 * Compute the worst-case number of indirect blocks that will be used 3656 * Compute the worst-case number of indirect blocks that will be used
3957 * for ip's delayed extent of length "len". 3657 * for ip's delayed extent of length "len".
@@ -3983,37 +3683,6 @@ xfs_bmap_worst_indlen(
3983 return rval; 3683 return rval;
3984} 3684}
3985 3685
3986#if defined(XFS_RW_TRACE)
3987STATIC void
3988xfs_bunmap_trace(
3989 xfs_inode_t *ip,
3990 xfs_fileoff_t bno,
3991 xfs_filblks_t len,
3992 int flags,
3993 inst_t *ra)
3994{
3995 if (ip->i_rwtrace == NULL)
3996 return;
3997 ktrace_enter(ip->i_rwtrace,
3998 (void *)(__psint_t)XFS_BUNMAP,
3999 (void *)ip,
4000 (void *)(__psint_t)((ip->i_d.di_size >> 32) & 0xffffffff),
4001 (void *)(__psint_t)(ip->i_d.di_size & 0xffffffff),
4002 (void *)(__psint_t)(((xfs_dfiloff_t)bno >> 32) & 0xffffffff),
4003 (void *)(__psint_t)((xfs_dfiloff_t)bno & 0xffffffff),
4004 (void *)(__psint_t)len,
4005 (void *)(__psint_t)flags,
4006 (void *)(unsigned long)current_cpu(),
4007 (void *)ra,
4008 (void *)0,
4009 (void *)0,
4010 (void *)0,
4011 (void *)0,
4012 (void *)0,
4013 (void *)0);
4014}
4015#endif
4016
4017/* 3686/*
4018 * Convert inode from non-attributed to attributed. 3687 * Convert inode from non-attributed to attributed.
4019 * Must not be in a transaction, ip must not be locked. 3688 * Must not be in a transaction, ip must not be locked.
@@ -4702,34 +4371,30 @@ error0:
4702 return XFS_ERROR(EFSCORRUPTED); 4371 return XFS_ERROR(EFSCORRUPTED);
4703} 4372}
4704 4373
4705#ifdef XFS_BMAP_TRACE 4374#ifdef DEBUG
4706/* 4375/*
4707 * Add bmap trace insert entries for all the contents of the extent records. 4376 * Add bmap trace insert entries for all the contents of the extent records.
4708 */ 4377 */
4709void 4378void
4710xfs_bmap_trace_exlist( 4379xfs_bmap_trace_exlist(
4711 const char *fname, /* function name */
4712 xfs_inode_t *ip, /* incore inode pointer */ 4380 xfs_inode_t *ip, /* incore inode pointer */
4713 xfs_extnum_t cnt, /* count of entries in the list */ 4381 xfs_extnum_t cnt, /* count of entries in the list */
4714 int whichfork) /* data or attr fork */ 4382 int whichfork, /* data or attr fork */
4383 unsigned long caller_ip)
4715{ 4384{
4716 xfs_bmbt_rec_host_t *ep; /* current extent record */
4717 xfs_extnum_t idx; /* extent record index */ 4385 xfs_extnum_t idx; /* extent record index */
4718 xfs_ifork_t *ifp; /* inode fork pointer */ 4386 xfs_ifork_t *ifp; /* inode fork pointer */
4719 xfs_bmbt_irec_t s; /* file extent record */ 4387 int state = 0;
4388
4389 if (whichfork == XFS_ATTR_FORK)
4390 state |= BMAP_ATTRFORK;
4720 4391
4721 ifp = XFS_IFORK_PTR(ip, whichfork); 4392 ifp = XFS_IFORK_PTR(ip, whichfork);
4722 ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); 4393 ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
4723 for (idx = 0; idx < cnt; idx++) { 4394 for (idx = 0; idx < cnt; idx++)
4724 ep = xfs_iext_get_ext(ifp, idx); 4395 trace_xfs_extlist(ip, idx, whichfork, caller_ip);
4725 xfs_bmbt_get_all(ep, &s);
4726 XFS_BMAP_TRACE_INSERT("exlist", ip, idx, 1, &s, NULL,
4727 whichfork);
4728 }
4729} 4396}
4730#endif
4731 4397
4732#ifdef DEBUG
4733/* 4398/*
4734 * Validate that the bmbt_irecs being returned from bmapi are valid 4399 * Validate that the bmbt_irecs being returned from bmapi are valid
4735 * given the callers original parameters. Specifically check the 4400 * given the callers original parameters. Specifically check the
@@ -5478,7 +5143,8 @@ xfs_bunmapi(
5478 int rsvd; /* OK to allocate reserved blocks */ 5143 int rsvd; /* OK to allocate reserved blocks */
5479 xfs_fsblock_t sum; 5144 xfs_fsblock_t sum;
5480 5145
5481 xfs_bunmap_trace(ip, bno, len, flags, (inst_t *)__return_address); 5146 trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
5147
5482 whichfork = (flags & XFS_BMAPI_ATTRFORK) ? 5148 whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
5483 XFS_ATTR_FORK : XFS_DATA_FORK; 5149 XFS_ATTR_FORK : XFS_DATA_FORK;
5484 ifp = XFS_IFORK_PTR(ip, whichfork); 5150 ifp = XFS_IFORK_PTR(ip, whichfork);
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 56f62d2edc35..419dafb9d87d 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -95,6 +95,21 @@ typedef struct xfs_bmap_free
95 /* need write cache flushing and no */ 95 /* need write cache flushing and no */
96 /* additional allocation alignments */ 96 /* additional allocation alignments */
97 97
98#define XFS_BMAPI_FLAGS \
99 { XFS_BMAPI_WRITE, "WRITE" }, \
100 { XFS_BMAPI_DELAY, "DELAY" }, \
101 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
102 { XFS_BMAPI_METADATA, "METADATA" }, \
103 { XFS_BMAPI_EXACT, "EXACT" }, \
104 { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \
105 { XFS_BMAPI_ASYNC, "ASYNC" }, \
106 { XFS_BMAPI_RSVBLOCKS, "RSVBLOCKS" }, \
107 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \
108 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \
109 { XFS_BMAPI_CONTIG, "CONTIG" }, \
110 { XFS_BMAPI_CONVERT, "CONVERT" }
111
112
98static inline int xfs_bmapi_aflag(int w) 113static inline int xfs_bmapi_aflag(int w)
99{ 114{
100 return (w == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0); 115 return (w == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0);
@@ -135,36 +150,43 @@ typedef struct xfs_bmalloca {
135 char conv; /* overwriting unwritten extents */ 150 char conv; /* overwriting unwritten extents */
136} xfs_bmalloca_t; 151} xfs_bmalloca_t;
137 152
138#if defined(__KERNEL__) && defined(XFS_BMAP_TRACE)
139/* 153/*
140 * Trace operations for bmap extent tracing 154 * Flags for xfs_bmap_add_extent*.
141 */ 155 */
142#define XFS_BMAP_KTRACE_DELETE 1 156#define BMAP_LEFT_CONTIG (1 << 0)
143#define XFS_BMAP_KTRACE_INSERT 2 157#define BMAP_RIGHT_CONTIG (1 << 1)
144#define XFS_BMAP_KTRACE_PRE_UP 3 158#define BMAP_LEFT_FILLING (1 << 2)
145#define XFS_BMAP_KTRACE_POST_UP 4 159#define BMAP_RIGHT_FILLING (1 << 3)
146 160#define BMAP_LEFT_DELAY (1 << 4)
147#define XFS_BMAP_TRACE_SIZE 4096 /* size of global trace buffer */ 161#define BMAP_RIGHT_DELAY (1 << 5)
148#define XFS_BMAP_KTRACE_SIZE 32 /* size of per-inode trace buffer */ 162#define BMAP_LEFT_VALID (1 << 6)
149extern ktrace_t *xfs_bmap_trace_buf; 163#define BMAP_RIGHT_VALID (1 << 7)
164#define BMAP_ATTRFORK (1 << 8)
165
166#define XFS_BMAP_EXT_FLAGS \
167 { BMAP_LEFT_CONTIG, "LC" }, \
168 { BMAP_RIGHT_CONTIG, "RC" }, \
169 { BMAP_LEFT_FILLING, "LF" }, \
170 { BMAP_RIGHT_FILLING, "RF" }, \
171 { BMAP_ATTRFORK, "ATTR" }
150 172
151/* 173/*
152 * Add bmap trace insert entries for all the contents of the extent list. 174 * Add bmap trace insert entries for all the contents of the extent list.
175 *
176 * Quite excessive tracing. Only do this for debug builds.
153 */ 177 */
178#if defined(__KERNEL) && defined(DEBUG)
154void 179void
155xfs_bmap_trace_exlist( 180xfs_bmap_trace_exlist(
156 const char *fname, /* function name */
157 struct xfs_inode *ip, /* incore inode pointer */ 181 struct xfs_inode *ip, /* incore inode pointer */
158 xfs_extnum_t cnt, /* count of entries in list */ 182 xfs_extnum_t cnt, /* count of entries in list */
159 int whichfork); /* data or attr fork */ 183 int whichfork,
184 unsigned long caller_ip); /* data or attr fork */
160#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ 185#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \
161 xfs_bmap_trace_exlist(__func__,ip,c,w) 186 xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_)
162 187#else
163#else /* __KERNEL__ && XFS_BMAP_TRACE */
164
165#define XFS_BMAP_TRACE_EXLIST(ip,c,w) 188#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
166 189#endif
167#endif /* __KERNEL__ && XFS_BMAP_TRACE */
168 190
169/* 191/*
170 * Convert inode from non-attributed to attributed. 192 * Convert inode from non-attributed to attributed.
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 6f5ccede63f9..38751d5fac6f 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -768,12 +768,6 @@ xfs_bmbt_trace_enter(
768 (void *)a0, (void *)a1, (void *)a2, (void *)a3, 768 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
769 (void *)a4, (void *)a5, (void *)a6, (void *)a7, 769 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
770 (void *)a8, (void *)a9, (void *)a10); 770 (void *)a8, (void *)a9, (void *)a10);
771 ktrace_enter(ip->i_btrace,
772 (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
773 (void *)func, (void *)s, (void *)ip, (void *)cur,
774 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
775 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
776 (void *)a8, (void *)a9, (void *)a10);
777} 771}
778 772
779STATIC void 773STATIC void
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 5549d495947f..cf07ca7c22e7 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -46,20 +46,12 @@ typedef struct xfs_bmdr_block {
46#define BMBT_STARTBLOCK_BITLEN 52 46#define BMBT_STARTBLOCK_BITLEN 52
47#define BMBT_BLOCKCOUNT_BITLEN 21 47#define BMBT_BLOCKCOUNT_BITLEN 21
48 48
49 49typedef struct xfs_bmbt_rec {
50#define BMBT_USE_64 1
51
52typedef struct xfs_bmbt_rec_32
53{
54 __uint32_t l0, l1, l2, l3;
55} xfs_bmbt_rec_32_t;
56typedef struct xfs_bmbt_rec_64
57{
58 __be64 l0, l1; 50 __be64 l0, l1;
59} xfs_bmbt_rec_64_t; 51} xfs_bmbt_rec_t;
60 52
61typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ 53typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */
62typedef xfs_bmbt_rec_64_t xfs_bmbt_rec_t, xfs_bmdr_rec_t; 54typedef xfs_bmbt_rec_t xfs_bmdr_rec_t;
63 55
64typedef struct xfs_bmbt_rec_host { 56typedef struct xfs_bmbt_rec_host {
65 __uint64_t l0, l1; 57 __uint64_t l0, l1;
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 52b5f14d0c32..36a0992dd669 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -39,6 +39,7 @@
39#include "xfs_btree_trace.h" 39#include "xfs_btree_trace.h"
40#include "xfs_ialloc.h" 40#include "xfs_ialloc.h"
41#include "xfs_error.h" 41#include "xfs_error.h"
42#include "xfs_trace.h"
42 43
43/* 44/*
44 * Cursor allocation zone. 45 * Cursor allocation zone.
@@ -81,7 +82,7 @@ xfs_btree_check_lblock(
81 XFS_ERRTAG_BTREE_CHECK_LBLOCK, 82 XFS_ERRTAG_BTREE_CHECK_LBLOCK,
82 XFS_RANDOM_BTREE_CHECK_LBLOCK))) { 83 XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
83 if (bp) 84 if (bp)
84 xfs_buftrace("LBTREE ERROR", bp); 85 trace_xfs_btree_corrupt(bp, _RET_IP_);
85 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW, 86 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
86 mp); 87 mp);
87 return XFS_ERROR(EFSCORRUPTED); 88 return XFS_ERROR(EFSCORRUPTED);
@@ -119,7 +120,7 @@ xfs_btree_check_sblock(
119 XFS_ERRTAG_BTREE_CHECK_SBLOCK, 120 XFS_ERRTAG_BTREE_CHECK_SBLOCK,
120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 121 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
121 if (bp) 122 if (bp)
122 xfs_buftrace("SBTREE ERROR", bp); 123 trace_xfs_btree_corrupt(bp, _RET_IP_);
123 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock", 124 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
124 XFS_ERRLEVEL_LOW, cur->bc_mp, block); 125 XFS_ERRLEVEL_LOW, cur->bc_mp, block);
125 return XFS_ERROR(EFSCORRUPTED); 126 return XFS_ERROR(EFSCORRUPTED);
diff --git a/fs/xfs/xfs_btree_trace.h b/fs/xfs/xfs_btree_trace.h
index b3f5eb3c3c6c..2d8a309873ea 100644
--- a/fs/xfs/xfs_btree_trace.h
+++ b/fs/xfs/xfs_btree_trace.h
@@ -58,8 +58,6 @@ void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *,
58 struct xfs_buf *, int, int); 58 struct xfs_buf *, int, int);
59void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *, 59void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *,
60 struct xfs_buf *, int, int, int); 60 struct xfs_buf *, int, int, int);
61void xfs_btree_trace_argfffi(const char *, struct xfs_btree_cur *,
62 xfs_dfiloff_t, xfs_dfsbno_t, xfs_dfilblks_t, int, int);
63void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int); 61void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int);
64void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int, 62void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int,
65 union xfs_btree_ptr, union xfs_btree_key *, int); 63 union xfs_btree_ptr, union xfs_btree_key *, int);
@@ -71,24 +69,10 @@ void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *,
71 union xfs_btree_rec *, int); 69 union xfs_btree_rec *, int);
72void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int); 70void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int);
73 71
74
75#define XFS_ALLOCBT_TRACE_SIZE 4096 /* size of global trace buffer */
76extern ktrace_t *xfs_allocbt_trace_buf;
77
78#define XFS_INOBT_TRACE_SIZE 4096 /* size of global trace buffer */
79extern ktrace_t *xfs_inobt_trace_buf;
80
81#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */
82#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */
83extern ktrace_t *xfs_bmbt_trace_buf;
84
85
86#define XFS_BTREE_TRACE_ARGBI(c, b, i) \ 72#define XFS_BTREE_TRACE_ARGBI(c, b, i) \
87 xfs_btree_trace_argbi(__func__, c, b, i, __LINE__) 73 xfs_btree_trace_argbi(__func__, c, b, i, __LINE__)
88#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \ 74#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \
89 xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__) 75 xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__)
90#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j) \
91 xfs_btree_trace_argfffi(__func__, c, o, b, i, j, __LINE__)
92#define XFS_BTREE_TRACE_ARGI(c, i) \ 76#define XFS_BTREE_TRACE_ARGI(c, i) \
93 xfs_btree_trace_argi(__func__, c, i, __LINE__) 77 xfs_btree_trace_argi(__func__, c, i, __LINE__)
94#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \ 78#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \
@@ -104,7 +88,6 @@ extern ktrace_t *xfs_bmbt_trace_buf;
104#else 88#else
105#define XFS_BTREE_TRACE_ARGBI(c, b, i) 89#define XFS_BTREE_TRACE_ARGBI(c, b, i)
106#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) 90#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
107#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j)
108#define XFS_BTREE_TRACE_ARGI(c, i) 91#define XFS_BTREE_TRACE_ARGI(c, i)
109#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s) 92#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
110#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) 93#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 92af4098c7e8..a30f7e9eb2b9 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -29,6 +29,7 @@
29#include "xfs_buf_item.h" 29#include "xfs_buf_item.h"
30#include "xfs_trans_priv.h" 30#include "xfs_trans_priv.h"
31#include "xfs_error.h" 31#include "xfs_error.h"
32#include "xfs_trace.h"
32 33
33 34
34kmem_zone_t *xfs_buf_item_zone; 35kmem_zone_t *xfs_buf_item_zone;
@@ -164,7 +165,7 @@ xfs_buf_item_size(
164 * is the buf log format structure with the 165 * is the buf log format structure with the
165 * cancel flag in it. 166 * cancel flag in it.
166 */ 167 */
167 xfs_buf_item_trace("SIZE STALE", bip); 168 trace_xfs_buf_item_size_stale(bip);
168 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 169 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
169 return 1; 170 return 1;
170 } 171 }
@@ -206,7 +207,7 @@ xfs_buf_item_size(
206 } 207 }
207 } 208 }
208 209
209 xfs_buf_item_trace("SIZE NORM", bip); 210 trace_xfs_buf_item_size(bip);
210 return nvecs; 211 return nvecs;
211} 212}
212 213
@@ -259,7 +260,7 @@ xfs_buf_item_format(
259 * is the buf log format structure with the 260 * is the buf log format structure with the
260 * cancel flag in it. 261 * cancel flag in it.
261 */ 262 */
262 xfs_buf_item_trace("FORMAT STALE", bip); 263 trace_xfs_buf_item_format_stale(bip);
263 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 264 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
264 bip->bli_format.blf_size = nvecs; 265 bip->bli_format.blf_size = nvecs;
265 return; 266 return;
@@ -335,7 +336,7 @@ xfs_buf_item_format(
335 /* 336 /*
336 * Check to make sure everything is consistent. 337 * Check to make sure everything is consistent.
337 */ 338 */
338 xfs_buf_item_trace("FORMAT NORM", bip); 339 trace_xfs_buf_item_format(bip);
339 xfs_buf_item_log_check(bip); 340 xfs_buf_item_log_check(bip);
340} 341}
341 342
@@ -355,8 +356,7 @@ xfs_buf_item_pin(
355 ASSERT(atomic_read(&bip->bli_refcount) > 0); 356 ASSERT(atomic_read(&bip->bli_refcount) > 0);
356 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || 357 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
357 (bip->bli_flags & XFS_BLI_STALE)); 358 (bip->bli_flags & XFS_BLI_STALE));
358 xfs_buf_item_trace("PIN", bip); 359 trace_xfs_buf_item_pin(bip);
359 xfs_buftrace("XFS_PIN", bp);
360 xfs_bpin(bp); 360 xfs_bpin(bp);
361} 361}
362 362
@@ -383,8 +383,7 @@ xfs_buf_item_unpin(
383 ASSERT(bp != NULL); 383 ASSERT(bp != NULL);
384 ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip); 384 ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip);
385 ASSERT(atomic_read(&bip->bli_refcount) > 0); 385 ASSERT(atomic_read(&bip->bli_refcount) > 0);
386 xfs_buf_item_trace("UNPIN", bip); 386 trace_xfs_buf_item_unpin(bip);
387 xfs_buftrace("XFS_UNPIN", bp);
388 387
389 freed = atomic_dec_and_test(&bip->bli_refcount); 388 freed = atomic_dec_and_test(&bip->bli_refcount);
390 ailp = bip->bli_item.li_ailp; 389 ailp = bip->bli_item.li_ailp;
@@ -395,8 +394,8 @@ xfs_buf_item_unpin(
395 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); 394 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
396 ASSERT(XFS_BUF_ISSTALE(bp)); 395 ASSERT(XFS_BUF_ISSTALE(bp));
397 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 396 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
398 xfs_buf_item_trace("UNPIN STALE", bip); 397 trace_xfs_buf_item_unpin_stale(bip);
399 xfs_buftrace("XFS_UNPIN STALE", bp); 398
400 /* 399 /*
401 * If we get called here because of an IO error, we may 400 * If we get called here because of an IO error, we may
402 * or may not have the item on the AIL. xfs_trans_ail_delete() 401 * or may not have the item on the AIL. xfs_trans_ail_delete()
@@ -440,8 +439,8 @@ xfs_buf_item_unpin_remove(
440 if ((atomic_read(&bip->bli_refcount) == 1) && 439 if ((atomic_read(&bip->bli_refcount) == 1) &&
441 (bip->bli_flags & XFS_BLI_STALE)) { 440 (bip->bli_flags & XFS_BLI_STALE)) {
442 ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0); 441 ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0);
443 xfs_buf_item_trace("UNPIN REMOVE", bip); 442 trace_xfs_buf_item_unpin_stale(bip);
444 xfs_buftrace("XFS_UNPIN_REMOVE", bp); 443
445 /* 444 /*
446 * yes -- clear the xaction descriptor in-use flag 445 * yes -- clear the xaction descriptor in-use flag
447 * and free the chunk if required. We can safely 446 * and free the chunk if required. We can safely
@@ -495,7 +494,7 @@ xfs_buf_item_trylock(
495 XFS_BUF_HOLD(bp); 494 XFS_BUF_HOLD(bp);
496 495
497 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 496 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
498 xfs_buf_item_trace("TRYLOCK SUCCESS", bip); 497 trace_xfs_buf_item_trylock(bip);
499 return XFS_ITEM_SUCCESS; 498 return XFS_ITEM_SUCCESS;
500} 499}
501 500
@@ -524,7 +523,6 @@ xfs_buf_item_unlock(
524 uint hold; 523 uint hold;
525 524
526 bp = bip->bli_buf; 525 bp = bip->bli_buf;
527 xfs_buftrace("XFS_UNLOCK", bp);
528 526
529 /* 527 /*
530 * Clear the buffer's association with this transaction. 528 * Clear the buffer's association with this transaction.
@@ -547,7 +545,7 @@ xfs_buf_item_unlock(
547 */ 545 */
548 if (bip->bli_flags & XFS_BLI_STALE) { 546 if (bip->bli_flags & XFS_BLI_STALE) {
549 bip->bli_flags &= ~XFS_BLI_LOGGED; 547 bip->bli_flags &= ~XFS_BLI_LOGGED;
550 xfs_buf_item_trace("UNLOCK STALE", bip); 548 trace_xfs_buf_item_unlock_stale(bip);
551 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 549 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
552 if (!aborted) 550 if (!aborted)
553 return; 551 return;
@@ -574,7 +572,7 @@ xfs_buf_item_unlock(
574 * release the buffer at the end of this routine. 572 * release the buffer at the end of this routine.
575 */ 573 */
576 hold = bip->bli_flags & XFS_BLI_HOLD; 574 hold = bip->bli_flags & XFS_BLI_HOLD;
577 xfs_buf_item_trace("UNLOCK", bip); 575 trace_xfs_buf_item_unlock(bip);
578 576
579 /* 577 /*
580 * If the buf item isn't tracking any data, free it. 578 * If the buf item isn't tracking any data, free it.
@@ -618,7 +616,8 @@ xfs_buf_item_committed(
618 xfs_buf_log_item_t *bip, 616 xfs_buf_log_item_t *bip,
619 xfs_lsn_t lsn) 617 xfs_lsn_t lsn)
620{ 618{
621 xfs_buf_item_trace("COMMITTED", bip); 619 trace_xfs_buf_item_committed(bip);
620
622 if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && 621 if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
623 (bip->bli_item.li_lsn != 0)) { 622 (bip->bli_item.li_lsn != 0)) {
624 return bip->bli_item.li_lsn; 623 return bip->bli_item.li_lsn;
@@ -640,7 +639,7 @@ xfs_buf_item_push(
640 xfs_buf_t *bp; 639 xfs_buf_t *bp;
641 640
642 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 641 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
643 xfs_buf_item_trace("PUSH", bip); 642 trace_xfs_buf_item_push(bip);
644 643
645 bp = bip->bli_buf; 644 bp = bip->bli_buf;
646 645
@@ -738,9 +737,6 @@ xfs_buf_item_init(
738 bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); 737 bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
739 bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); 738 bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
740 bip->bli_format.blf_map_size = map_size; 739 bip->bli_format.blf_map_size = map_size;
741#ifdef XFS_BLI_TRACE
742 bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_NOFS);
743#endif
744 740
745#ifdef XFS_TRANS_DEBUG 741#ifdef XFS_TRANS_DEBUG
746 /* 742 /*
@@ -878,9 +874,6 @@ xfs_buf_item_free(
878 kmem_free(bip->bli_logged); 874 kmem_free(bip->bli_logged);
879#endif /* XFS_TRANS_DEBUG */ 875#endif /* XFS_TRANS_DEBUG */
880 876
881#ifdef XFS_BLI_TRACE
882 ktrace_free(bip->bli_trace);
883#endif
884 kmem_zone_free(xfs_buf_item_zone, bip); 877 kmem_zone_free(xfs_buf_item_zone, bip);
885} 878}
886 879
@@ -897,7 +890,8 @@ xfs_buf_item_relse(
897{ 890{
898 xfs_buf_log_item_t *bip; 891 xfs_buf_log_item_t *bip;
899 892
900 xfs_buftrace("XFS_RELSE", bp); 893 trace_xfs_buf_item_relse(bp, _RET_IP_);
894
901 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 895 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
902 XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list); 896 XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list);
903 if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) && 897 if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) &&
@@ -994,7 +988,7 @@ xfs_buf_iodone_callbacks(
994 if (XFS_FORCED_SHUTDOWN(mp)) { 988 if (XFS_FORCED_SHUTDOWN(mp)) {
995 ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); 989 ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp);
996 XFS_BUF_SUPER_STALE(bp); 990 XFS_BUF_SUPER_STALE(bp);
997 xfs_buftrace("BUF_IODONE_CB", bp); 991 trace_xfs_buf_item_iodone(bp, _RET_IP_);
998 xfs_buf_do_callbacks(bp, lip); 992 xfs_buf_do_callbacks(bp, lip);
999 XFS_BUF_SET_FSPRIVATE(bp, NULL); 993 XFS_BUF_SET_FSPRIVATE(bp, NULL);
1000 XFS_BUF_CLR_IODONE_FUNC(bp); 994 XFS_BUF_CLR_IODONE_FUNC(bp);
@@ -1030,7 +1024,7 @@ xfs_buf_iodone_callbacks(
1030 XFS_BUF_SET_START(bp); 1024 XFS_BUF_SET_START(bp);
1031 } 1025 }
1032 ASSERT(XFS_BUF_IODONE_FUNC(bp)); 1026 ASSERT(XFS_BUF_IODONE_FUNC(bp));
1033 xfs_buftrace("BUF_IODONE ASYNC", bp); 1027 trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
1034 xfs_buf_relse(bp); 1028 xfs_buf_relse(bp);
1035 } else { 1029 } else {
1036 /* 1030 /*
@@ -1053,9 +1047,7 @@ xfs_buf_iodone_callbacks(
1053 } 1047 }
1054 return; 1048 return;
1055 } 1049 }
1056#ifdef XFSERRORDEBUG 1050
1057 xfs_buftrace("XFS BUFCB NOERR", bp);
1058#endif
1059 xfs_buf_do_callbacks(bp, lip); 1051 xfs_buf_do_callbacks(bp, lip);
1060 XFS_BUF_SET_FSPRIVATE(bp, NULL); 1052 XFS_BUF_SET_FSPRIVATE(bp, NULL);
1061 XFS_BUF_CLR_IODONE_FUNC(bp); 1053 XFS_BUF_CLR_IODONE_FUNC(bp);
@@ -1081,7 +1073,9 @@ xfs_buf_error_relse(
1081 XFS_BUF_DONE(bp); 1073 XFS_BUF_DONE(bp);
1082 XFS_BUF_UNDELAYWRITE(bp); 1074 XFS_BUF_UNDELAYWRITE(bp);
1083 XFS_BUF_ERROR(bp,0); 1075 XFS_BUF_ERROR(bp,0);
1084 xfs_buftrace("BUF_ERROR_RELSE", bp); 1076
1077 trace_xfs_buf_error_relse(bp, _RET_IP_);
1078
1085 if (! XFS_FORCED_SHUTDOWN(mp)) 1079 if (! XFS_FORCED_SHUTDOWN(mp))
1086 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1080 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1087 /* 1081 /*
@@ -1128,34 +1122,3 @@ xfs_buf_iodone(
1128 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); 1122 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
1129 xfs_buf_item_free(bip); 1123 xfs_buf_item_free(bip);
1130} 1124}
1131
1132#if defined(XFS_BLI_TRACE)
1133void
1134xfs_buf_item_trace(
1135 char *id,
1136 xfs_buf_log_item_t *bip)
1137{
1138 xfs_buf_t *bp;
1139 ASSERT(bip->bli_trace != NULL);
1140
1141 bp = bip->bli_buf;
1142 ktrace_enter(bip->bli_trace,
1143 (void *)id,
1144 (void *)bip->bli_buf,
1145 (void *)((unsigned long)bip->bli_flags),
1146 (void *)((unsigned long)bip->bli_recur),
1147 (void *)((unsigned long)atomic_read(&bip->bli_refcount)),
1148 (void *)((unsigned long)
1149 (0xFFFFFFFF & XFS_BUF_ADDR(bp) >> 32)),
1150 (void *)((unsigned long)(0xFFFFFFFF & XFS_BUF_ADDR(bp))),
1151 (void *)((unsigned long)XFS_BUF_COUNT(bp)),
1152 (void *)((unsigned long)XFS_BUF_BFLAGS(bp)),
1153 XFS_BUF_FSPRIVATE(bp, void *),
1154 XFS_BUF_FSPRIVATE2(bp, void *),
1155 (void *)(unsigned long)XFS_BUF_ISPINNED(bp),
1156 (void *)XFS_BUF_IODONE_FUNC(bp),
1157 (void *)((unsigned long)(XFS_BUF_VALUSEMA(bp))),
1158 (void *)bip->bli_item.li_desc,
1159 (void *)((unsigned long)bip->bli_item.li_flags));
1160}
1161#endif /* XFS_BLI_TRACE */
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 5a41c348bb1c..217f34af00cb 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -70,22 +70,21 @@ typedef struct xfs_buf_log_format_t {
70#define XFS_BLI_INODE_ALLOC_BUF 0x10 70#define XFS_BLI_INODE_ALLOC_BUF 0x10
71#define XFS_BLI_STALE_INODE 0x20 71#define XFS_BLI_STALE_INODE 0x20
72 72
73#define XFS_BLI_FLAGS \
74 { XFS_BLI_HOLD, "HOLD" }, \
75 { XFS_BLI_DIRTY, "DIRTY" }, \
76 { XFS_BLI_STALE, "STALE" }, \
77 { XFS_BLI_LOGGED, "LOGGED" }, \
78 { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \
79 { XFS_BLI_STALE_INODE, "STALE_INODE" }
80
73 81
74#ifdef __KERNEL__ 82#ifdef __KERNEL__
75 83
76struct xfs_buf; 84struct xfs_buf;
77struct ktrace;
78struct xfs_mount; 85struct xfs_mount;
79struct xfs_buf_log_item; 86struct xfs_buf_log_item;
80 87
81#if defined(XFS_BLI_TRACE)
82#define XFS_BLI_TRACE_SIZE 32
83
84void xfs_buf_item_trace(char *, struct xfs_buf_log_item *);
85#else
86#define xfs_buf_item_trace(id, bip)
87#endif
88
89/* 88/*
90 * This is the in core log item structure used to track information 89 * This is the in core log item structure used to track information
91 * needed to log buffers. It tracks how many times the lock has been 90 * needed to log buffers. It tracks how many times the lock has been
@@ -97,9 +96,6 @@ typedef struct xfs_buf_log_item {
97 unsigned int bli_flags; /* misc flags */ 96 unsigned int bli_flags; /* misc flags */
98 unsigned int bli_recur; /* lock recursion count */ 97 unsigned int bli_recur; /* lock recursion count */
99 atomic_t bli_refcount; /* cnt of tp refs */ 98 atomic_t bli_refcount; /* cnt of tp refs */
100#ifdef XFS_BLI_TRACE
101 struct ktrace *bli_trace; /* event trace buf */
102#endif
103#ifdef XFS_TRANS_DEBUG 99#ifdef XFS_TRANS_DEBUG
104 char *bli_orig; /* original buffer copy */ 100 char *bli_orig; /* original buffer copy */
105 char *bli_logged; /* bytes logged (bitmap) */ 101 char *bli_logged; /* bytes logged (bitmap) */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 2847bbc1c534..c0c8869115b1 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -46,6 +46,7 @@
46#include "xfs_dir2_block.h" 46#include "xfs_dir2_block.h"
47#include "xfs_dir2_node.h" 47#include "xfs_dir2_node.h"
48#include "xfs_error.h" 48#include "xfs_error.h"
49#include "xfs_trace.h"
49 50
50/* 51/*
51 * xfs_da_btree.c 52 * xfs_da_btree.c
@@ -2107,7 +2108,7 @@ xfs_da_do_buf(
2107 (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC), 2108 (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC),
2108 mp, XFS_ERRTAG_DA_READ_BUF, 2109 mp, XFS_ERRTAG_DA_READ_BUF,
2109 XFS_RANDOM_DA_READ_BUF))) { 2110 XFS_RANDOM_DA_READ_BUF))) {
2110 xfs_buftrace("DA READ ERROR", rbp->bps[0]); 2111 trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_);
2111 XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)", 2112 XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
2112 XFS_ERRLEVEL_LOW, mp, info); 2113 XFS_ERRLEVEL_LOW, mp, info);
2113 error = XFS_ERROR(EFSCORRUPTED); 2114 error = XFS_ERROR(EFSCORRUPTED);
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 8c536167bf75..30cd08f56a3a 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -125,6 +125,13 @@ typedef struct xfs_da_args {
125#define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */ 125#define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */
126#define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */ 126#define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */
127 127
128#define XFS_DA_OP_FLAGS \
129 { XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \
130 { XFS_DA_OP_RENAME, "RENAME" }, \
131 { XFS_DA_OP_ADDNAME, "ADDNAME" }, \
132 { XFS_DA_OP_OKNOENT, "OKNOENT" }, \
133 { XFS_DA_OP_CILOOKUP, "CILOOKUP" }
134
128/* 135/*
129 * Structure to describe buffer(s) for a block. 136 * Structure to describe buffer(s) for a block.
130 * This is needed in the directory version 2 format case, when 137 * This is needed in the directory version 2 format case, when
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index ab89a7e94a0f..84ca1cf16a1e 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -43,6 +43,7 @@
43#include "xfs_error.h" 43#include "xfs_error.h"
44#include "xfs_rw.h" 44#include "xfs_rw.h"
45#include "xfs_vnodeops.h" 45#include "xfs_vnodeops.h"
46#include "xfs_trace.h"
46 47
47/* 48/*
48 * Syssgi interface for swapext 49 * Syssgi interface for swapext
@@ -113,10 +114,82 @@ xfs_swapext(
113 return error; 114 return error;
114} 115}
115 116
117/*
118 * We need to check that the format of the data fork in the temporary inode is
119 * valid for the target inode before doing the swap. This is not a problem with
120 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
121 * data fork depending on the space the attribute fork is taking so we can get
122 * invalid formats on the target inode.
123 *
124 * E.g. target has space for 7 extents in extent format, temp inode only has
125 * space for 6. If we defragment down to 7 extents, then the tmp format is a
126 * btree, but when swapped it needs to be in extent format. Hence we can't just
127 * blindly swap data forks on attr2 filesystems.
128 *
129 * Note that we check the swap in both directions so that we don't end up with
130 * a corrupt temporary inode, either.
131 *
132 * Note that fixing the way xfs_fsr sets up the attribute fork in the source
133 * inode will prevent this situation from occurring, so all we do here is
134 * reject and log the attempt. basically we are putting the responsibility on
135 * userspace to get this right.
136 */
137static int
138xfs_swap_extents_check_format(
139 xfs_inode_t *ip, /* target inode */
140 xfs_inode_t *tip) /* tmp inode */
141{
142
143 /* Should never get a local format */
144 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
145 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
146 return EINVAL;
147
148 /*
149 * if the target inode has less extents that then temporary inode then
150 * why did userspace call us?
151 */
152 if (ip->i_d.di_nextents < tip->i_d.di_nextents)
153 return EINVAL;
154
155 /*
156 * if the target inode is in extent form and the temp inode is in btree
157 * form then we will end up with the target inode in the wrong format
158 * as we already know there are less extents in the temp inode.
159 */
160 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
161 tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
162 return EINVAL;
163
164 /* Check temp in extent form to max in target */
165 if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
166 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max)
167 return EINVAL;
168
169 /* Check target in extent form to max in temp */
170 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
171 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
172 return EINVAL;
173
174 /* Check root block of temp in btree form to max in target */
175 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
176 XFS_IFORK_BOFF(ip) &&
177 tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
178 return EINVAL;
179
180 /* Check root block of target in btree form to max in temp */
181 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
182 XFS_IFORK_BOFF(tip) &&
183 ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
184 return EINVAL;
185
186 return 0;
187}
188
116int 189int
117xfs_swap_extents( 190xfs_swap_extents(
118 xfs_inode_t *ip, 191 xfs_inode_t *ip, /* target inode */
119 xfs_inode_t *tip, 192 xfs_inode_t *tip, /* tmp inode */
120 xfs_swapext_t *sxp) 193 xfs_swapext_t *sxp)
121{ 194{
122 xfs_mount_t *mp; 195 xfs_mount_t *mp;
@@ -160,15 +233,7 @@ xfs_swap_extents(
160 goto out_unlock; 233 goto out_unlock;
161 } 234 }
162 235
163 /* Should never get a local format */
164 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
165 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
166 error = XFS_ERROR(EINVAL);
167 goto out_unlock;
168 }
169
170 if (VN_CACHED(VFS_I(tip)) != 0) { 236 if (VN_CACHED(VFS_I(tip)) != 0) {
171 xfs_inval_cached_trace(tip, 0, -1, 0, -1);
172 error = xfs_flushinval_pages(tip, 0, -1, 237 error = xfs_flushinval_pages(tip, 0, -1,
173 FI_REMAPF_LOCKED); 238 FI_REMAPF_LOCKED);
174 if (error) 239 if (error)
@@ -189,13 +254,12 @@ xfs_swap_extents(
189 goto out_unlock; 254 goto out_unlock;
190 } 255 }
191 256
192 /* 257 /* check inode formats now that data is flushed */
193 * If the target has extended attributes, the tmp file 258 error = xfs_swap_extents_check_format(ip, tip);
194 * must also in order to ensure the correct data fork 259 if (error) {
195 * format. 260 xfs_fs_cmn_err(CE_NOTE, mp,
196 */ 261 "%s: inode 0x%llx format is incompatible for exchanging.",
197 if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { 262 __FILE__, ip->i_ino);
198 error = XFS_ERROR(EINVAL);
199 goto out_unlock; 263 goto out_unlock;
200 } 264 }
201 265
@@ -276,6 +340,16 @@ xfs_swap_extents(
276 *tifp = *tempifp; /* struct copy */ 340 *tifp = *tempifp; /* struct copy */
277 341
278 /* 342 /*
343 * Fix the in-memory data fork values that are dependent on the fork
344 * offset in the inode. We can't assume they remain the same as attr2
345 * has dynamic fork offsets.
346 */
347 ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) /
348 (uint)sizeof(xfs_bmbt_rec_t);
349 tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) /
350 (uint)sizeof(xfs_bmbt_rec_t);
351
352 /*
279 * Fix the on-disk inode values 353 * Fix the on-disk inode values
280 */ 354 */
281 tmp = (__uint64_t)ip->i_d.di_nblocks; 355 tmp = (__uint64_t)ip->i_d.di_nblocks;
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index bb1d58eb3982..93634a7e90e9 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -40,9 +40,9 @@
40#include "xfs_dir2_leaf.h" 40#include "xfs_dir2_leaf.h"
41#include "xfs_dir2_block.h" 41#include "xfs_dir2_block.h"
42#include "xfs_dir2_node.h" 42#include "xfs_dir2_node.h"
43#include "xfs_dir2_trace.h"
44#include "xfs_error.h" 43#include "xfs_error.h"
45#include "xfs_vnodeops.h" 44#include "xfs_vnodeops.h"
45#include "xfs_trace.h"
46 46
47struct xfs_name xfs_name_dotdot = {"..", 2}; 47struct xfs_name xfs_name_dotdot = {"..", 2};
48 48
@@ -525,7 +525,8 @@ xfs_dir2_grow_inode(
525 xfs_trans_t *tp; 525 xfs_trans_t *tp;
526 xfs_drfsbno_t nblks; 526 xfs_drfsbno_t nblks;
527 527
528 xfs_dir2_trace_args_s("grow_inode", args, space); 528 trace_xfs_dir2_grow_inode(args, space);
529
529 dp = args->dp; 530 dp = args->dp;
530 tp = args->trans; 531 tp = args->trans;
531 mp = dp->i_mount; 532 mp = dp->i_mount;
@@ -703,7 +704,8 @@ xfs_dir2_shrink_inode(
703 xfs_mount_t *mp; 704 xfs_mount_t *mp;
704 xfs_trans_t *tp; 705 xfs_trans_t *tp;
705 706
706 xfs_dir2_trace_args_db("shrink_inode", args, db, bp); 707 trace_xfs_dir2_shrink_inode(args, db);
708
707 dp = args->dp; 709 dp = args->dp;
708 mp = dp->i_mount; 710 mp = dp->i_mount;
709 tp = args->trans; 711 tp = args->trans;
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index ab52e9e1c1ee..ddc4ecc7807f 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -36,8 +36,8 @@
36#include "xfs_dir2_data.h" 36#include "xfs_dir2_data.h"
37#include "xfs_dir2_leaf.h" 37#include "xfs_dir2_leaf.h"
38#include "xfs_dir2_block.h" 38#include "xfs_dir2_block.h"
39#include "xfs_dir2_trace.h"
40#include "xfs_error.h" 39#include "xfs_error.h"
40#include "xfs_trace.h"
41 41
42/* 42/*
43 * Local function prototypes. 43 * Local function prototypes.
@@ -94,7 +94,8 @@ xfs_dir2_block_addname(
94 __be16 *tagp; /* pointer to tag value */ 94 __be16 *tagp; /* pointer to tag value */
95 xfs_trans_t *tp; /* transaction structure */ 95 xfs_trans_t *tp; /* transaction structure */
96 96
97 xfs_dir2_trace_args("block_addname", args); 97 trace_xfs_dir2_block_addname(args);
98
98 dp = args->dp; 99 dp = args->dp;
99 tp = args->trans; 100 tp = args->trans;
100 mp = dp->i_mount; 101 mp = dp->i_mount;
@@ -590,7 +591,8 @@ xfs_dir2_block_lookup(
590 int error; /* error return value */ 591 int error; /* error return value */
591 xfs_mount_t *mp; /* filesystem mount point */ 592 xfs_mount_t *mp; /* filesystem mount point */
592 593
593 xfs_dir2_trace_args("block_lookup", args); 594 trace_xfs_dir2_block_lookup(args);
595
594 /* 596 /*
595 * Get the buffer, look up the entry. 597 * Get the buffer, look up the entry.
596 * If not found (ENOENT) then return, have no buffer. 598 * If not found (ENOENT) then return, have no buffer.
@@ -747,7 +749,8 @@ xfs_dir2_block_removename(
747 int size; /* shortform size */ 749 int size; /* shortform size */
748 xfs_trans_t *tp; /* transaction pointer */ 750 xfs_trans_t *tp; /* transaction pointer */
749 751
750 xfs_dir2_trace_args("block_removename", args); 752 trace_xfs_dir2_block_removename(args);
753
751 /* 754 /*
752 * Look up the entry in the block. Gets the buffer and entry index. 755 * Look up the entry in the block. Gets the buffer and entry index.
753 * It will always be there, the vnodeops level does a lookup first. 756 * It will always be there, the vnodeops level does a lookup first.
@@ -823,7 +826,8 @@ xfs_dir2_block_replace(
823 int error; /* error return value */ 826 int error; /* error return value */
824 xfs_mount_t *mp; /* filesystem mount point */ 827 xfs_mount_t *mp; /* filesystem mount point */
825 828
826 xfs_dir2_trace_args("block_replace", args); 829 trace_xfs_dir2_block_replace(args);
830
827 /* 831 /*
828 * Lookup the entry in the directory. Get buffer and entry index. 832 * Lookup the entry in the directory. Get buffer and entry index.
829 * This will always succeed since the caller has already done a lookup. 833 * This will always succeed since the caller has already done a lookup.
@@ -897,7 +901,8 @@ xfs_dir2_leaf_to_block(
897 int to; /* block/leaf to index */ 901 int to; /* block/leaf to index */
898 xfs_trans_t *tp; /* transaction pointer */ 902 xfs_trans_t *tp; /* transaction pointer */
899 903
900 xfs_dir2_trace_args_bb("leaf_to_block", args, lbp, dbp); 904 trace_xfs_dir2_leaf_to_block(args);
905
901 dp = args->dp; 906 dp = args->dp;
902 tp = args->trans; 907 tp = args->trans;
903 mp = dp->i_mount; 908 mp = dp->i_mount;
@@ -1044,7 +1049,8 @@ xfs_dir2_sf_to_block(
1044 xfs_trans_t *tp; /* transaction pointer */ 1049 xfs_trans_t *tp; /* transaction pointer */
1045 struct xfs_name name; 1050 struct xfs_name name;
1046 1051
1047 xfs_dir2_trace_args("sf_to_block", args); 1052 trace_xfs_dir2_sf_to_block(args);
1053
1048 dp = args->dp; 1054 dp = args->dp;
1049 tp = args->trans; 1055 tp = args->trans;
1050 mp = dp->i_mount; 1056 mp = dp->i_mount;
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 41ad537c49e9..29f484c11b3a 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -38,8 +38,8 @@
38#include "xfs_dir2_leaf.h" 38#include "xfs_dir2_leaf.h"
39#include "xfs_dir2_block.h" 39#include "xfs_dir2_block.h"
40#include "xfs_dir2_node.h" 40#include "xfs_dir2_node.h"
41#include "xfs_dir2_trace.h"
42#include "xfs_error.h" 41#include "xfs_error.h"
42#include "xfs_trace.h"
43 43
44/* 44/*
45 * Local function declarations. 45 * Local function declarations.
@@ -80,7 +80,8 @@ xfs_dir2_block_to_leaf(
80 int needscan; /* need to rescan bestfree */ 80 int needscan; /* need to rescan bestfree */
81 xfs_trans_t *tp; /* transaction pointer */ 81 xfs_trans_t *tp; /* transaction pointer */
82 82
83 xfs_dir2_trace_args_b("block_to_leaf", args, dbp); 83 trace_xfs_dir2_block_to_leaf(args);
84
84 dp = args->dp; 85 dp = args->dp;
85 mp = dp->i_mount; 86 mp = dp->i_mount;
86 tp = args->trans; 87 tp = args->trans;
@@ -188,7 +189,8 @@ xfs_dir2_leaf_addname(
188 xfs_trans_t *tp; /* transaction pointer */ 189 xfs_trans_t *tp; /* transaction pointer */
189 xfs_dir2_db_t use_block; /* data block number */ 190 xfs_dir2_db_t use_block; /* data block number */
190 191
191 xfs_dir2_trace_args("leaf_addname", args); 192 trace_xfs_dir2_leaf_addname(args);
193
192 dp = args->dp; 194 dp = args->dp;
193 tp = args->trans; 195 tp = args->trans;
194 mp = dp->i_mount; 196 mp = dp->i_mount;
@@ -1266,7 +1268,8 @@ xfs_dir2_leaf_lookup(
1266 xfs_dir2_leaf_entry_t *lep; /* leaf entry */ 1268 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1267 xfs_trans_t *tp; /* transaction pointer */ 1269 xfs_trans_t *tp; /* transaction pointer */
1268 1270
1269 xfs_dir2_trace_args("leaf_lookup", args); 1271 trace_xfs_dir2_leaf_lookup(args);
1272
1270 /* 1273 /*
1271 * Look up name in the leaf block, returning both buffers and index. 1274 * Look up name in the leaf block, returning both buffers and index.
1272 */ 1275 */
@@ -1454,7 +1457,8 @@ xfs_dir2_leaf_removename(
1454 xfs_dir2_data_off_t oldbest; /* old value of best free */ 1457 xfs_dir2_data_off_t oldbest; /* old value of best free */
1455 xfs_trans_t *tp; /* transaction pointer */ 1458 xfs_trans_t *tp; /* transaction pointer */
1456 1459
1457 xfs_dir2_trace_args("leaf_removename", args); 1460 trace_xfs_dir2_leaf_removename(args);
1461
1458 /* 1462 /*
1459 * Lookup the leaf entry, get the leaf and data blocks read in. 1463 * Lookup the leaf entry, get the leaf and data blocks read in.
1460 */ 1464 */
@@ -1586,7 +1590,8 @@ xfs_dir2_leaf_replace(
1586 xfs_dir2_leaf_entry_t *lep; /* leaf entry */ 1590 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1587 xfs_trans_t *tp; /* transaction pointer */ 1591 xfs_trans_t *tp; /* transaction pointer */
1588 1592
1589 xfs_dir2_trace_args("leaf_replace", args); 1593 trace_xfs_dir2_leaf_replace(args);
1594
1590 /* 1595 /*
1591 * Look up the entry. 1596 * Look up the entry.
1592 */ 1597 */
@@ -1766,7 +1771,9 @@ xfs_dir2_node_to_leaf(
1766 if (state->path.active > 1) 1771 if (state->path.active > 1)
1767 return 0; 1772 return 0;
1768 args = state->args; 1773 args = state->args;
1769 xfs_dir2_trace_args("node_to_leaf", args); 1774
1775 trace_xfs_dir2_node_to_leaf(args);
1776
1770 mp = state->mp; 1777 mp = state->mp;
1771 dp = args->dp; 1778 dp = args->dp;
1772 tp = args->trans; 1779 tp = args->trans;
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 5a81ccd1045b..ce6e355199b5 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -37,8 +37,8 @@
37#include "xfs_dir2_leaf.h" 37#include "xfs_dir2_leaf.h"
38#include "xfs_dir2_block.h" 38#include "xfs_dir2_block.h"
39#include "xfs_dir2_node.h" 39#include "xfs_dir2_node.h"
40#include "xfs_dir2_trace.h"
41#include "xfs_error.h" 40#include "xfs_error.h"
41#include "xfs_trace.h"
42 42
43/* 43/*
44 * Function declarations. 44 * Function declarations.
@@ -123,7 +123,8 @@ xfs_dir2_leaf_to_node(
123 __be16 *to; /* pointer to freespace entry */ 123 __be16 *to; /* pointer to freespace entry */
124 xfs_trans_t *tp; /* transaction pointer */ 124 xfs_trans_t *tp; /* transaction pointer */
125 125
126 xfs_dir2_trace_args_b("leaf_to_node", args, lbp); 126 trace_xfs_dir2_leaf_to_node(args);
127
127 dp = args->dp; 128 dp = args->dp;
128 mp = dp->i_mount; 129 mp = dp->i_mount;
129 tp = args->trans; 130 tp = args->trans;
@@ -196,7 +197,8 @@ xfs_dir2_leafn_add(
196 xfs_mount_t *mp; /* filesystem mount point */ 197 xfs_mount_t *mp; /* filesystem mount point */
197 xfs_trans_t *tp; /* transaction pointer */ 198 xfs_trans_t *tp; /* transaction pointer */
198 199
199 xfs_dir2_trace_args_sb("leafn_add", args, index, bp); 200 trace_xfs_dir2_leafn_add(args, index);
201
200 dp = args->dp; 202 dp = args->dp;
201 mp = dp->i_mount; 203 mp = dp->i_mount;
202 tp = args->trans; 204 tp = args->trans;
@@ -711,8 +713,8 @@ xfs_dir2_leafn_moveents(
711 int stale; /* count stale leaves copied */ 713 int stale; /* count stale leaves copied */
712 xfs_trans_t *tp; /* transaction pointer */ 714 xfs_trans_t *tp; /* transaction pointer */
713 715
714 xfs_dir2_trace_args_bibii("leafn_moveents", args, bp_s, start_s, bp_d, 716 trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count);
715 start_d, count); 717
716 /* 718 /*
717 * Silently return if nothing to do. 719 * Silently return if nothing to do.
718 */ 720 */
@@ -933,7 +935,8 @@ xfs_dir2_leafn_remove(
933 int needscan; /* need to rescan data frees */ 935 int needscan; /* need to rescan data frees */
934 xfs_trans_t *tp; /* transaction pointer */ 936 xfs_trans_t *tp; /* transaction pointer */
935 937
936 xfs_dir2_trace_args_sb("leafn_remove", args, index, bp); 938 trace_xfs_dir2_leafn_remove(args, index);
939
937 dp = args->dp; 940 dp = args->dp;
938 tp = args->trans; 941 tp = args->trans;
939 mp = dp->i_mount; 942 mp = dp->i_mount;
@@ -1363,7 +1366,8 @@ xfs_dir2_node_addname(
1363 int rval; /* sub-return value */ 1366 int rval; /* sub-return value */
1364 xfs_da_state_t *state; /* btree cursor */ 1367 xfs_da_state_t *state; /* btree cursor */
1365 1368
1366 xfs_dir2_trace_args("node_addname", args); 1369 trace_xfs_dir2_node_addname(args);
1370
1367 /* 1371 /*
1368 * Allocate and initialize the state (btree cursor). 1372 * Allocate and initialize the state (btree cursor).
1369 */ 1373 */
@@ -1822,7 +1826,8 @@ xfs_dir2_node_lookup(
1822 int rval; /* operation return value */ 1826 int rval; /* operation return value */
1823 xfs_da_state_t *state; /* btree cursor */ 1827 xfs_da_state_t *state; /* btree cursor */
1824 1828
1825 xfs_dir2_trace_args("node_lookup", args); 1829 trace_xfs_dir2_node_lookup(args);
1830
1826 /* 1831 /*
1827 * Allocate and initialize the btree cursor. 1832 * Allocate and initialize the btree cursor.
1828 */ 1833 */
@@ -1875,7 +1880,8 @@ xfs_dir2_node_removename(
1875 int rval; /* operation return value */ 1880 int rval; /* operation return value */
1876 xfs_da_state_t *state; /* btree cursor */ 1881 xfs_da_state_t *state; /* btree cursor */
1877 1882
1878 xfs_dir2_trace_args("node_removename", args); 1883 trace_xfs_dir2_node_removename(args);
1884
1879 /* 1885 /*
1880 * Allocate and initialize the btree cursor. 1886 * Allocate and initialize the btree cursor.
1881 */ 1887 */
@@ -1944,7 +1950,8 @@ xfs_dir2_node_replace(
1944 int rval; /* internal return value */ 1950 int rval; /* internal return value */
1945 xfs_da_state_t *state; /* btree cursor */ 1951 xfs_da_state_t *state; /* btree cursor */
1946 1952
1947 xfs_dir2_trace_args("node_replace", args); 1953 trace_xfs_dir2_node_replace(args);
1954
1948 /* 1955 /*
1949 * Allocate and initialize the btree cursor. 1956 * Allocate and initialize the btree cursor.
1950 */ 1957 */
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index e89734e84646..9d4f17a69676 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -37,7 +37,7 @@
37#include "xfs_dir2_data.h" 37#include "xfs_dir2_data.h"
38#include "xfs_dir2_leaf.h" 38#include "xfs_dir2_leaf.h"
39#include "xfs_dir2_block.h" 39#include "xfs_dir2_block.h"
40#include "xfs_dir2_trace.h" 40#include "xfs_trace.h"
41 41
42/* 42/*
43 * Prototypes for internal functions. 43 * Prototypes for internal functions.
@@ -169,7 +169,8 @@ xfs_dir2_block_to_sf(
169 xfs_dir2_sf_t *sfp; /* shortform structure */ 169 xfs_dir2_sf_t *sfp; /* shortform structure */
170 xfs_ino_t temp; 170 xfs_ino_t temp;
171 171
172 xfs_dir2_trace_args_sb("block_to_sf", args, size, bp); 172 trace_xfs_dir2_block_to_sf(args);
173
173 dp = args->dp; 174 dp = args->dp;
174 mp = dp->i_mount; 175 mp = dp->i_mount;
175 176
@@ -281,7 +282,8 @@ xfs_dir2_sf_addname(
281 xfs_dir2_sf_t *sfp; /* shortform structure */ 282 xfs_dir2_sf_t *sfp; /* shortform structure */
282 xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */ 283 xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */
283 284
284 xfs_dir2_trace_args("sf_addname", args); 285 trace_xfs_dir2_sf_addname(args);
286
285 ASSERT(xfs_dir2_sf_lookup(args) == ENOENT); 287 ASSERT(xfs_dir2_sf_lookup(args) == ENOENT);
286 dp = args->dp; 288 dp = args->dp;
287 ASSERT(dp->i_df.if_flags & XFS_IFINLINE); 289 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
@@ -654,7 +656,8 @@ xfs_dir2_sf_create(
654 xfs_dir2_sf_t *sfp; /* shortform structure */ 656 xfs_dir2_sf_t *sfp; /* shortform structure */
655 int size; /* directory size */ 657 int size; /* directory size */
656 658
657 xfs_dir2_trace_args_i("sf_create", args, pino); 659 trace_xfs_dir2_sf_create(args);
660
658 dp = args->dp; 661 dp = args->dp;
659 662
660 ASSERT(dp != NULL); 663 ASSERT(dp != NULL);
@@ -808,7 +811,8 @@ xfs_dir2_sf_lookup(
808 enum xfs_dacmp cmp; /* comparison result */ 811 enum xfs_dacmp cmp; /* comparison result */
809 xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */ 812 xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */
810 813
811 xfs_dir2_trace_args("sf_lookup", args); 814 trace_xfs_dir2_sf_lookup(args);
815
812 xfs_dir2_sf_check(args); 816 xfs_dir2_sf_check(args);
813 dp = args->dp; 817 dp = args->dp;
814 818
@@ -891,7 +895,8 @@ xfs_dir2_sf_removename(
891 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ 895 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
892 xfs_dir2_sf_t *sfp; /* shortform structure */ 896 xfs_dir2_sf_t *sfp; /* shortform structure */
893 897
894 xfs_dir2_trace_args("sf_removename", args); 898 trace_xfs_dir2_sf_removename(args);
899
895 dp = args->dp; 900 dp = args->dp;
896 901
897 ASSERT(dp->i_df.if_flags & XFS_IFINLINE); 902 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
@@ -982,7 +987,8 @@ xfs_dir2_sf_replace(
982 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ 987 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
983 xfs_dir2_sf_t *sfp; /* shortform structure */ 988 xfs_dir2_sf_t *sfp; /* shortform structure */
984 989
985 xfs_dir2_trace_args("sf_replace", args); 990 trace_xfs_dir2_sf_replace(args);
991
986 dp = args->dp; 992 dp = args->dp;
987 993
988 ASSERT(dp->i_df.if_flags & XFS_IFINLINE); 994 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
@@ -1125,7 +1131,8 @@ xfs_dir2_sf_toino4(
1125 xfs_dir2_sf_entry_t *sfep; /* new sf entry */ 1131 xfs_dir2_sf_entry_t *sfep; /* new sf entry */
1126 xfs_dir2_sf_t *sfp; /* new sf directory */ 1132 xfs_dir2_sf_t *sfp; /* new sf directory */
1127 1133
1128 xfs_dir2_trace_args("sf_toino4", args); 1134 trace_xfs_dir2_sf_toino4(args);
1135
1129 dp = args->dp; 1136 dp = args->dp;
1130 1137
1131 /* 1138 /*
@@ -1202,7 +1209,8 @@ xfs_dir2_sf_toino8(
1202 xfs_dir2_sf_entry_t *sfep; /* new sf entry */ 1209 xfs_dir2_sf_entry_t *sfep; /* new sf entry */
1203 xfs_dir2_sf_t *sfp; /* new sf directory */ 1210 xfs_dir2_sf_t *sfp; /* new sf directory */
1204 1211
1205 xfs_dir2_trace_args("sf_toino8", args); 1212 trace_xfs_dir2_sf_toino8(args);
1213
1206 dp = args->dp; 1214 dp = args->dp;
1207 1215
1208 /* 1216 /*
diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c
deleted file mode 100644
index 6cc7c0c681ac..000000000000
--- a/fs/xfs/xfs_dir2_trace.c
+++ /dev/null
@@ -1,216 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_inum.h"
22#include "xfs_dir2.h"
23#include "xfs_da_btree.h"
24#include "xfs_bmap_btree.h"
25#include "xfs_dir2_sf.h"
26#include "xfs_attr_sf.h"
27#include "xfs_dinode.h"
28#include "xfs_inode.h"
29#include "xfs_dir2_trace.h"
30
31#ifdef XFS_DIR2_TRACE
32ktrace_t *xfs_dir2_trace_buf;
33
34/*
35 * Enter something in the trace buffers.
36 */
37static void
38xfs_dir2_trace_enter(
39 xfs_inode_t *dp,
40 int type,
41 char *where,
42 char *name,
43 int namelen,
44 void *a0,
45 void *a1,
46 void *a2,
47 void *a3,
48 void *a4,
49 void *a5,
50 void *a6,
51 void *a7)
52{
53 void *n[5];
54
55 ASSERT(xfs_dir2_trace_buf);
56 ASSERT(dp->i_dir_trace);
57 if (name)
58 memcpy(n, name, min((int)sizeof(n), namelen));
59 else
60 memset((char *)n, 0, sizeof(n));
61 ktrace_enter(xfs_dir2_trace_buf,
62 (void *)(long)type, (void *)where,
63 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
64 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
65 (void *)(long)namelen,
66 (void *)n[0], (void *)n[1], (void *)n[2],
67 (void *)n[3], (void *)n[4]);
68 ktrace_enter(dp->i_dir_trace,
69 (void *)(long)type, (void *)where,
70 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
71 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
72 (void *)(long)namelen,
73 (void *)n[0], (void *)n[1], (void *)n[2],
74 (void *)n[3], (void *)n[4]);
75}
76
77void
78xfs_dir2_trace_args(
79 char *where,
80 xfs_da_args_t *args)
81{
82 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS, where,
83 (char *)args->name, (int)args->namelen,
84 (void *)(unsigned long)args->hashval,
85 (void *)((unsigned long)(args->inumber >> 32)),
86 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
87 (void *)args->dp, (void *)args->trans,
88 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
89 NULL, NULL);
90}
91
92void
93xfs_dir2_trace_args_b(
94 char *where,
95 xfs_da_args_t *args,
96 xfs_dabuf_t *bp)
97{
98 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_B, where,
99 (char *)args->name, (int)args->namelen,
100 (void *)(unsigned long)args->hashval,
101 (void *)((unsigned long)(args->inumber >> 32)),
102 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
103 (void *)args->dp, (void *)args->trans,
104 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
105 (void *)(bp ? bp->bps[0] : NULL), NULL);
106}
107
108void
109xfs_dir2_trace_args_bb(
110 char *where,
111 xfs_da_args_t *args,
112 xfs_dabuf_t *lbp,
113 xfs_dabuf_t *dbp)
114{
115 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_BB, where,
116 (char *)args->name, (int)args->namelen,
117 (void *)(unsigned long)args->hashval,
118 (void *)((unsigned long)(args->inumber >> 32)),
119 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
120 (void *)args->dp, (void *)args->trans,
121 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
122 (void *)(lbp ? lbp->bps[0] : NULL),
123 (void *)(dbp ? dbp->bps[0] : NULL));
124}
125
126void
127xfs_dir2_trace_args_bibii(
128 char *where,
129 xfs_da_args_t *args,
130 xfs_dabuf_t *bs,
131 int ss,
132 xfs_dabuf_t *bd,
133 int sd,
134 int c)
135{
136 xfs_buf_t *bpbs = bs ? bs->bps[0] : NULL;
137 xfs_buf_t *bpbd = bd ? bd->bps[0] : NULL;
138
139 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_BIBII, where,
140 (char *)args->name, (int)args->namelen,
141 (void *)args->dp, (void *)args->trans,
142 (void *)bpbs, (void *)(long)ss, (void *)bpbd, (void *)(long)sd,
143 (void *)(long)c, NULL);
144}
145
146void
147xfs_dir2_trace_args_db(
148 char *where,
149 xfs_da_args_t *args,
150 xfs_dir2_db_t db,
151 xfs_dabuf_t *bp)
152{
153 xfs_buf_t *dbp = bp ? bp->bps[0] : NULL;
154
155 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_DB, where,
156 (char *)args->name, (int)args->namelen,
157 (void *)(unsigned long)args->hashval,
158 (void *)((unsigned long)(args->inumber >> 32)),
159 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
160 (void *)args->dp, (void *)args->trans,
161 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
162 (void *)(long)db, (void *)dbp);
163}
164
165void
166xfs_dir2_trace_args_i(
167 char *where,
168 xfs_da_args_t *args,
169 xfs_ino_t i)
170{
171 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_I, where,
172 (char *)args->name, (int)args->namelen,
173 (void *)(unsigned long)args->hashval,
174 (void *)((unsigned long)(args->inumber >> 32)),
175 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
176 (void *)args->dp, (void *)args->trans,
177 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
178 (void *)((unsigned long)(i >> 32)),
179 (void *)((unsigned long)(i & 0xFFFFFFFF)));
180}
181
182void
183xfs_dir2_trace_args_s(
184 char *where,
185 xfs_da_args_t *args,
186 int s)
187{
188 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_S, where,
189 (char *)args->name, (int)args->namelen,
190 (void *)(unsigned long)args->hashval,
191 (void *)((unsigned long)(args->inumber >> 32)),
192 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
193 (void *)args->dp, (void *)args->trans,
194 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
195 (void *)(long)s, NULL);
196}
197
198void
199xfs_dir2_trace_args_sb(
200 char *where,
201 xfs_da_args_t *args,
202 int s,
203 xfs_dabuf_t *bp)
204{
205 xfs_buf_t *dbp = bp ? bp->bps[0] : NULL;
206
207 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_SB, where,
208 (char *)args->name, (int)args->namelen,
209 (void *)(unsigned long)args->hashval,
210 (void *)((unsigned long)(args->inumber >> 32)),
211 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
212 (void *)args->dp, (void *)args->trans,
213 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
214 (void *)(long)s, (void *)dbp);
215}
216#endif /* XFS_DIR2_TRACE */
diff --git a/fs/xfs/xfs_dir2_trace.h b/fs/xfs/xfs_dir2_trace.h
deleted file mode 100644
index ca3c754f4822..000000000000
--- a/fs/xfs/xfs_dir2_trace.h
+++ /dev/null
@@ -1,72 +0,0 @@
1/*
2 * Copyright (c) 2000,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DIR2_TRACE_H__
19#define __XFS_DIR2_TRACE_H__
20
21/*
22 * Tracing for xfs v2 directories.
23 */
24
25#if defined(XFS_DIR2_TRACE)
26
27struct ktrace;
28struct xfs_dabuf;
29struct xfs_da_args;
30
31#define XFS_DIR2_GTRACE_SIZE 4096 /* global buffer */
32#define XFS_DIR2_KTRACE_SIZE 32 /* per-inode buffer */
33extern struct ktrace *xfs_dir2_trace_buf;
34
35#define XFS_DIR2_KTRACE_ARGS 1 /* args only */
36#define XFS_DIR2_KTRACE_ARGS_B 2 /* args + buffer */
37#define XFS_DIR2_KTRACE_ARGS_BB 3 /* args + 2 buffers */
38#define XFS_DIR2_KTRACE_ARGS_DB 4 /* args, db, buffer */
39#define XFS_DIR2_KTRACE_ARGS_I 5 /* args, inum */
40#define XFS_DIR2_KTRACE_ARGS_S 6 /* args, int */
41#define XFS_DIR2_KTRACE_ARGS_SB 7 /* args, int, buffer */
42#define XFS_DIR2_KTRACE_ARGS_BIBII 8 /* args, buf/int/buf/int/int */
43
44void xfs_dir2_trace_args(char *where, struct xfs_da_args *args);
45void xfs_dir2_trace_args_b(char *where, struct xfs_da_args *args,
46 struct xfs_dabuf *bp);
47void xfs_dir2_trace_args_bb(char *where, struct xfs_da_args *args,
48 struct xfs_dabuf *lbp, struct xfs_dabuf *dbp);
49void xfs_dir2_trace_args_bibii(char *where, struct xfs_da_args *args,
50 struct xfs_dabuf *bs, int ss,
51 struct xfs_dabuf *bd, int sd, int c);
52void xfs_dir2_trace_args_db(char *where, struct xfs_da_args *args,
53 xfs_dir2_db_t db, struct xfs_dabuf *bp);
54void xfs_dir2_trace_args_i(char *where, struct xfs_da_args *args, xfs_ino_t i);
55void xfs_dir2_trace_args_s(char *where, struct xfs_da_args *args, int s);
56void xfs_dir2_trace_args_sb(char *where, struct xfs_da_args *args, int s,
57 struct xfs_dabuf *bp);
58
59#else /* XFS_DIR2_TRACE */
60
61#define xfs_dir2_trace_args(where, args)
62#define xfs_dir2_trace_args_b(where, args, bp)
63#define xfs_dir2_trace_args_bb(where, args, lbp, dbp)
64#define xfs_dir2_trace_args_bibii(where, args, bs, ss, bd, sd, c)
65#define xfs_dir2_trace_args_db(where, args, db, bp)
66#define xfs_dir2_trace_args_i(where, args, i)
67#define xfs_dir2_trace_args_s(where, args, s)
68#define xfs_dir2_trace_args_sb(where, args, s, bp)
69
70#endif /* XFS_DIR2_TRACE */
71
72#endif /* __XFS_DIR2_TRACE_H__ */
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index edf8bdf4141f..a631e1451abb 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -34,6 +34,7 @@
34#include "xfs_utils.h" 34#include "xfs_utils.h"
35#include "xfs_mru_cache.h" 35#include "xfs_mru_cache.h"
36#include "xfs_filestream.h" 36#include "xfs_filestream.h"
37#include "xfs_trace.h"
37 38
38#ifdef XFS_FILESTREAMS_TRACE 39#ifdef XFS_FILESTREAMS_TRACE
39 40
@@ -394,9 +395,7 @@ xfs_filestream_init(void)
394 item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item"); 395 item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
395 if (!item_zone) 396 if (!item_zone)
396 return -ENOMEM; 397 return -ENOMEM;
397#ifdef XFS_FILESTREAMS_TRACE 398
398 xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_NOFS);
399#endif
400 return 0; 399 return 0;
401} 400}
402 401
@@ -407,9 +406,6 @@ xfs_filestream_init(void)
407void 406void
408xfs_filestream_uninit(void) 407xfs_filestream_uninit(void)
409{ 408{
410#ifdef XFS_FILESTREAMS_TRACE
411 ktrace_free(xfs_filestreams_trace_buf);
412#endif
413 kmem_zone_destroy(item_zone); 409 kmem_zone_destroy(item_zone);
414} 410}
415 411
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 36079aa91344..a13919a6a364 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -45,6 +45,7 @@
45#include "xfs_rtalloc.h" 45#include "xfs_rtalloc.h"
46#include "xfs_rw.h" 46#include "xfs_rw.h"
47#include "xfs_filestream.h" 47#include "xfs_filestream.h"
48#include "xfs_trace.h"
48 49
49/* 50/*
50 * File system operations 51 * File system operations
@@ -347,6 +348,7 @@ xfs_growfs_data_private(
347 be32_add_cpu(&agf->agf_length, new); 348 be32_add_cpu(&agf->agf_length, new);
348 ASSERT(be32_to_cpu(agf->agf_length) == 349 ASSERT(be32_to_cpu(agf->agf_length) ==
349 be32_to_cpu(agi->agi_length)); 350 be32_to_cpu(agi->agi_length));
351
350 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); 352 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
351 /* 353 /*
352 * Free the new space. 354 * Free the new space.
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 073bb4a26b19..155e798f30a1 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -43,7 +43,7 @@
43#include "xfs_inode_item.h" 43#include "xfs_inode_item.h"
44#include "xfs_bmap.h" 44#include "xfs_bmap.h"
45#include "xfs_btree_trace.h" 45#include "xfs_btree_trace.h"
46#include "xfs_dir2_trace.h" 46#include "xfs_trace.h"
47 47
48 48
49/* 49/*
@@ -73,7 +73,6 @@ xfs_inode_alloc(
73 ASSERT(atomic_read(&ip->i_pincount) == 0); 73 ASSERT(atomic_read(&ip->i_pincount) == 0);
74 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 74 ASSERT(!spin_is_locked(&ip->i_flags_lock));
75 ASSERT(completion_done(&ip->i_flush)); 75 ASSERT(completion_done(&ip->i_flush));
76 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
77 76
78 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 77 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
79 78
@@ -90,30 +89,8 @@ xfs_inode_alloc(
90 ip->i_size = 0; 89 ip->i_size = 0;
91 ip->i_new_size = 0; 90 ip->i_new_size = 0;
92 91
93 /*
94 * Initialize inode's trace buffers.
95 */
96#ifdef XFS_INODE_TRACE
97 ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
98#endif
99#ifdef XFS_BMAP_TRACE
100 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
101#endif
102#ifdef XFS_BTREE_TRACE
103 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
104#endif
105#ifdef XFS_RW_TRACE
106 ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
107#endif
108#ifdef XFS_ILOCK_TRACE
109 ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
110#endif
111#ifdef XFS_DIR2_TRACE
112 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
113#endif
114
115 /* prevent anyone from using this yet */ 92 /* prevent anyone from using this yet */
116 VFS_I(ip)->i_state = I_NEW|I_LOCK; 93 VFS_I(ip)->i_state = I_NEW;
117 94
118 return ip; 95 return ip;
119} 96}
@@ -133,25 +110,6 @@ xfs_inode_free(
133 if (ip->i_afp) 110 if (ip->i_afp)
134 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 111 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
135 112
136#ifdef XFS_INODE_TRACE
137 ktrace_free(ip->i_trace);
138#endif
139#ifdef XFS_BMAP_TRACE
140 ktrace_free(ip->i_xtrace);
141#endif
142#ifdef XFS_BTREE_TRACE
143 ktrace_free(ip->i_btrace);
144#endif
145#ifdef XFS_RW_TRACE
146 ktrace_free(ip->i_rwtrace);
147#endif
148#ifdef XFS_ILOCK_TRACE
149 ktrace_free(ip->i_lock_trace);
150#endif
151#ifdef XFS_DIR2_TRACE
152 ktrace_free(ip->i_dir_trace);
153#endif
154
155 if (ip->i_itemp) { 113 if (ip->i_itemp) {
156 /* 114 /*
157 * Only if we are shutting down the fs will we see an 115 * Only if we are shutting down the fs will we see an
@@ -210,6 +168,7 @@ xfs_iget_cache_hit(
210 * instead of polling for it. 168 * instead of polling for it.
211 */ 169 */
212 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { 170 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
171 trace_xfs_iget_skip(ip);
213 XFS_STATS_INC(xs_ig_frecycle); 172 XFS_STATS_INC(xs_ig_frecycle);
214 error = EAGAIN; 173 error = EAGAIN;
215 goto out_error; 174 goto out_error;
@@ -228,7 +187,7 @@ xfs_iget_cache_hit(
228 * Need to carefully get it back into useable state. 187 * Need to carefully get it back into useable state.
229 */ 188 */
230 if (ip->i_flags & XFS_IRECLAIMABLE) { 189 if (ip->i_flags & XFS_IRECLAIMABLE) {
231 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 190 trace_xfs_iget_reclaim(ip);
232 191
233 /* 192 /*
234 * We need to set XFS_INEW atomically with clearing the 193 * We need to set XFS_INEW atomically with clearing the
@@ -254,9 +213,10 @@ xfs_iget_cache_hit(
254 ip->i_flags &= ~XFS_INEW; 213 ip->i_flags &= ~XFS_INEW;
255 ip->i_flags |= XFS_IRECLAIMABLE; 214 ip->i_flags |= XFS_IRECLAIMABLE;
256 __xfs_inode_set_reclaim_tag(pag, ip); 215 __xfs_inode_set_reclaim_tag(pag, ip);
216 trace_xfs_iget_reclaim(ip);
257 goto out_error; 217 goto out_error;
258 } 218 }
259 inode->i_state = I_LOCK|I_NEW; 219 inode->i_state = I_NEW;
260 } else { 220 } else {
261 /* If the VFS inode is being torn down, pause and try again. */ 221 /* If the VFS inode is being torn down, pause and try again. */
262 if (!igrab(inode)) { 222 if (!igrab(inode)) {
@@ -273,8 +233,9 @@ xfs_iget_cache_hit(
273 xfs_ilock(ip, lock_flags); 233 xfs_ilock(ip, lock_flags);
274 234
275 xfs_iflags_clear(ip, XFS_ISTALE); 235 xfs_iflags_clear(ip, XFS_ISTALE);
276 xfs_itrace_exit_tag(ip, "xfs_iget.found");
277 XFS_STATS_INC(xs_ig_found); 236 XFS_STATS_INC(xs_ig_found);
237
238 trace_xfs_iget_found(ip);
278 return 0; 239 return 0;
279 240
280out_error: 241out_error:
@@ -308,7 +269,7 @@ xfs_iget_cache_miss(
308 if (error) 269 if (error)
309 goto out_destroy; 270 goto out_destroy;
310 271
311 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 272 xfs_itrace_entry(ip);
312 273
313 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { 274 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
314 error = ENOENT; 275 error = ENOENT;
@@ -353,6 +314,8 @@ xfs_iget_cache_miss(
353 314
354 write_unlock(&pag->pag_ici_lock); 315 write_unlock(&pag->pag_ici_lock);
355 radix_tree_preload_end(); 316 radix_tree_preload_end();
317
318 trace_xfs_iget_alloc(ip);
356 *ipp = ip; 319 *ipp = ip;
357 return 0; 320 return 0;
358 321
@@ -514,17 +477,21 @@ xfs_ireclaim(
514{ 477{
515 struct xfs_mount *mp = ip->i_mount; 478 struct xfs_mount *mp = ip->i_mount;
516 struct xfs_perag *pag; 479 struct xfs_perag *pag;
480 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
517 481
518 XFS_STATS_INC(xs_ig_reclaims); 482 XFS_STATS_INC(xs_ig_reclaims);
519 483
520 /* 484 /*
521 * Remove the inode from the per-AG radix tree. It doesn't matter 485 * Remove the inode from the per-AG radix tree.
522 * if it was never added to it because radix_tree_delete can deal 486 *
523 * with that case just fine. 487 * Because radix_tree_delete won't complain even if the item was never
488 * added to the tree assert that it's been there before to catch
489 * problems with the inode life time early on.
524 */ 490 */
525 pag = xfs_get_perag(mp, ip->i_ino); 491 pag = xfs_get_perag(mp, ip->i_ino);
526 write_lock(&pag->pag_ici_lock); 492 write_lock(&pag->pag_ici_lock);
527 radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); 493 if (!radix_tree_delete(&pag->pag_ici_root, agino))
494 ASSERT(0);
528 write_unlock(&pag->pag_ici_lock); 495 write_unlock(&pag->pag_ici_lock);
529 xfs_put_perag(mp, pag); 496 xfs_put_perag(mp, pag);
530 497
@@ -639,7 +606,7 @@ xfs_ilock(
639 else if (lock_flags & XFS_ILOCK_SHARED) 606 else if (lock_flags & XFS_ILOCK_SHARED)
640 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 607 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
641 608
642 xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address); 609 trace_xfs_ilock(ip, lock_flags, _RET_IP_);
643} 610}
644 611
645/* 612/*
@@ -684,7 +651,7 @@ xfs_ilock_nowait(
684 if (!mrtryaccess(&ip->i_lock)) 651 if (!mrtryaccess(&ip->i_lock))
685 goto out_undo_iolock; 652 goto out_undo_iolock;
686 } 653 }
687 xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address); 654 trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
688 return 1; 655 return 1;
689 656
690 out_undo_iolock: 657 out_undo_iolock:
@@ -746,7 +713,7 @@ xfs_iunlock(
746 xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp, 713 xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp,
747 (xfs_log_item_t*)(ip->i_itemp)); 714 (xfs_log_item_t*)(ip->i_itemp));
748 } 715 }
749 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); 716 trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
750} 717}
751 718
752/* 719/*
@@ -765,6 +732,8 @@ xfs_ilock_demote(
765 mrdemote(&ip->i_lock); 732 mrdemote(&ip->i_lock);
766 if (lock_flags & XFS_IOLOCK_EXCL) 733 if (lock_flags & XFS_IOLOCK_EXCL)
767 mrdemote(&ip->i_iolock); 734 mrdemote(&ip->i_iolock);
735
736 trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
768} 737}
769 738
770#ifdef DEBUG 739#ifdef DEBUG
@@ -795,52 +764,3 @@ xfs_isilocked(
795 return 1; 764 return 1;
796} 765}
797#endif 766#endif
798
799#ifdef XFS_INODE_TRACE
800
801#define KTRACE_ENTER(ip, vk, s, line, ra) \
802 ktrace_enter((ip)->i_trace, \
803/* 0 */ (void *)(__psint_t)(vk), \
804/* 1 */ (void *)(s), \
805/* 2 */ (void *)(__psint_t) line, \
806/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \
807/* 4 */ (void *)(ra), \
808/* 5 */ NULL, \
809/* 6 */ (void *)(__psint_t)current_cpu(), \
810/* 7 */ (void *)(__psint_t)current_pid(), \
811/* 8 */ (void *)__return_address, \
812/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL)
813
814/*
815 * Vnode tracing code.
816 */
817void
818_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra)
819{
820 KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra);
821}
822
823void
824_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra)
825{
826 KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra);
827}
828
829void
830xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra)
831{
832 KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra);
833}
834
835void
836_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra)
837{
838 KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra);
839}
840
841void
842xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra)
843{
844 KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra);
845}
846#endif /* XFS_INODE_TRACE */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b92a4fa2a0a1..ef77fd88c8e3 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -47,10 +47,10 @@
47#include "xfs_rw.h" 47#include "xfs_rw.h"
48#include "xfs_error.h" 48#include "xfs_error.h"
49#include "xfs_utils.h" 49#include "xfs_utils.h"
50#include "xfs_dir2_trace.h"
51#include "xfs_quota.h" 50#include "xfs_quota.h"
52#include "xfs_filestream.h" 51#include "xfs_filestream.h"
53#include "xfs_vnodeops.h" 52#include "xfs_vnodeops.h"
53#include "xfs_trace.h"
54 54
55kmem_zone_t *xfs_ifork_zone; 55kmem_zone_t *xfs_ifork_zone;
56kmem_zone_t *xfs_inode_zone; 56kmem_zone_t *xfs_inode_zone;
@@ -1291,42 +1291,6 @@ xfs_file_last_byte(
1291 return last_byte; 1291 return last_byte;
1292} 1292}
1293 1293
1294#if defined(XFS_RW_TRACE)
1295STATIC void
1296xfs_itrunc_trace(
1297 int tag,
1298 xfs_inode_t *ip,
1299 int flag,
1300 xfs_fsize_t new_size,
1301 xfs_off_t toss_start,
1302 xfs_off_t toss_finish)
1303{
1304 if (ip->i_rwtrace == NULL) {
1305 return;
1306 }
1307
1308 ktrace_enter(ip->i_rwtrace,
1309 (void*)((long)tag),
1310 (void*)ip,
1311 (void*)(unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff),
1312 (void*)(unsigned long)(ip->i_d.di_size & 0xffffffff),
1313 (void*)((long)flag),
1314 (void*)(unsigned long)((new_size >> 32) & 0xffffffff),
1315 (void*)(unsigned long)(new_size & 0xffffffff),
1316 (void*)(unsigned long)((toss_start >> 32) & 0xffffffff),
1317 (void*)(unsigned long)(toss_start & 0xffffffff),
1318 (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff),
1319 (void*)(unsigned long)(toss_finish & 0xffffffff),
1320 (void*)(unsigned long)current_cpu(),
1321 (void*)(unsigned long)current_pid(),
1322 (void*)NULL,
1323 (void*)NULL,
1324 (void*)NULL);
1325}
1326#else
1327#define xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish)
1328#endif
1329
1330/* 1294/*
1331 * Start the truncation of the file to new_size. The new size 1295 * Start the truncation of the file to new_size. The new size
1332 * must be smaller than the current size. This routine will 1296 * must be smaller than the current size. This routine will
@@ -1409,8 +1373,7 @@ xfs_itruncate_start(
1409 return 0; 1373 return 0;
1410 } 1374 }
1411 last_byte = xfs_file_last_byte(ip); 1375 last_byte = xfs_file_last_byte(ip);
1412 xfs_itrunc_trace(XFS_ITRUNC_START, ip, flags, new_size, toss_start, 1376 trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte);
1413 last_byte);
1414 if (last_byte > toss_start) { 1377 if (last_byte > toss_start) {
1415 if (flags & XFS_ITRUNC_DEFINITE) { 1378 if (flags & XFS_ITRUNC_DEFINITE) {
1416 xfs_tosspages(ip, toss_start, 1379 xfs_tosspages(ip, toss_start,
@@ -1514,7 +1477,8 @@ xfs_itruncate_finish(
1514 new_size = 0LL; 1477 new_size = 0LL;
1515 } 1478 }
1516 first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 1479 first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
1517 xfs_itrunc_trace(XFS_ITRUNC_FINISH1, ip, 0, new_size, 0, 0); 1480 trace_xfs_itruncate_finish_start(ip, new_size);
1481
1518 /* 1482 /*
1519 * The first thing we do is set the size to new_size permanently 1483 * The first thing we do is set the size to new_size permanently
1520 * on disk. This way we don't have to worry about anyone ever 1484 * on disk. This way we don't have to worry about anyone ever
@@ -1731,7 +1695,7 @@ xfs_itruncate_finish(
1731 ASSERT((new_size != 0) || 1695 ASSERT((new_size != 0) ||
1732 (fork == XFS_ATTR_FORK) || 1696 (fork == XFS_ATTR_FORK) ||
1733 (ip->i_d.di_nextents == 0)); 1697 (ip->i_d.di_nextents == 0));
1734 xfs_itrunc_trace(XFS_ITRUNC_FINISH2, ip, 0, new_size, 0, 0); 1698 trace_xfs_itruncate_finish_end(ip, new_size);
1735 return 0; 1699 return 0;
1736} 1700}
1737 1701
@@ -2877,8 +2841,8 @@ xfs_iflush(
2877 mp = ip->i_mount; 2841 mp = ip->i_mount;
2878 2842
2879 /* 2843 /*
2880 * If the inode isn't dirty, then just release the inode 2844 * If the inode isn't dirty, then just release the inode flush lock and
2881 * flush lock and do nothing. 2845 * do nothing.
2882 */ 2846 */
2883 if (xfs_inode_clean(ip)) { 2847 if (xfs_inode_clean(ip)) {
2884 xfs_ifunlock(ip); 2848 xfs_ifunlock(ip);
@@ -2904,6 +2868,19 @@ xfs_iflush(
2904 xfs_iunpin_wait(ip); 2868 xfs_iunpin_wait(ip);
2905 2869
2906 /* 2870 /*
2871 * For stale inodes we cannot rely on the backing buffer remaining
2872 * stale in cache for the remaining life of the stale inode and so
2873 * xfs_itobp() below may give us a buffer that no longer contains
2874 * inodes below. We have to check this after ensuring the inode is
2875 * unpinned so that it is safe to reclaim the stale inode after the
2876 * flush call.
2877 */
2878 if (xfs_iflags_test(ip, XFS_ISTALE)) {
2879 xfs_ifunlock(ip);
2880 return 0;
2881 }
2882
2883 /*
2907 * This may have been unpinned because the filesystem is shutting 2884 * This may have been unpinned because the filesystem is shutting
2908 * down forcibly. If that's the case we must not write this inode 2885 * down forcibly. If that's the case we must not write this inode
2909 * to disk, because the log record didn't make it to disk! 2886 * to disk, because the log record didn't make it to disk!
@@ -3252,23 +3229,6 @@ corrupt_out:
3252 return XFS_ERROR(EFSCORRUPTED); 3229 return XFS_ERROR(EFSCORRUPTED);
3253} 3230}
3254 3231
3255
3256
3257#ifdef XFS_ILOCK_TRACE
3258void
3259xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra)
3260{
3261 ktrace_enter(ip->i_lock_trace,
3262 (void *)ip,
3263 (void *)(unsigned long)lock, /* 1 = LOCK, 3=UNLOCK, etc */
3264 (void *)(unsigned long)lockflags, /* XFS_ILOCK_EXCL etc */
3265 (void *)ra, /* caller of ilock */
3266 (void *)(unsigned long)current_cpu(),
3267 (void *)(unsigned long)current_pid(),
3268 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);
3269}
3270#endif
3271
3272/* 3232/*
3273 * Return a pointer to the extent record at file index idx. 3233 * Return a pointer to the extent record at file index idx.
3274 */ 3234 */
@@ -3300,13 +3260,17 @@ xfs_iext_get_ext(
3300 */ 3260 */
3301void 3261void
3302xfs_iext_insert( 3262xfs_iext_insert(
3303 xfs_ifork_t *ifp, /* inode fork pointer */ 3263 xfs_inode_t *ip, /* incore inode pointer */
3304 xfs_extnum_t idx, /* starting index of new items */ 3264 xfs_extnum_t idx, /* starting index of new items */
3305 xfs_extnum_t count, /* number of inserted items */ 3265 xfs_extnum_t count, /* number of inserted items */
3306 xfs_bmbt_irec_t *new) /* items to insert */ 3266 xfs_bmbt_irec_t *new, /* items to insert */
3267 int state) /* type of extent conversion */
3307{ 3268{
3269 xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
3308 xfs_extnum_t i; /* extent record index */ 3270 xfs_extnum_t i; /* extent record index */
3309 3271
3272 trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
3273
3310 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 3274 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
3311 xfs_iext_add(ifp, idx, count); 3275 xfs_iext_add(ifp, idx, count);
3312 for (i = idx; i < idx + count; i++, new++) 3276 for (i = idx; i < idx + count; i++, new++)
@@ -3549,13 +3513,17 @@ xfs_iext_add_indirect_multi(
3549 */ 3513 */
3550void 3514void
3551xfs_iext_remove( 3515xfs_iext_remove(
3552 xfs_ifork_t *ifp, /* inode fork pointer */ 3516 xfs_inode_t *ip, /* incore inode pointer */
3553 xfs_extnum_t idx, /* index to begin removing exts */ 3517 xfs_extnum_t idx, /* index to begin removing exts */
3554 int ext_diff) /* number of extents to remove */ 3518 int ext_diff, /* number of extents to remove */
3519 int state) /* type of extent conversion */
3555{ 3520{
3521 xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
3556 xfs_extnum_t nextents; /* number of extents in file */ 3522 xfs_extnum_t nextents; /* number of extents in file */
3557 int new_size; /* size of extents after removal */ 3523 int new_size; /* size of extents after removal */
3558 3524
3525 trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
3526
3559 ASSERT(ext_diff > 0); 3527 ASSERT(ext_diff > 0);
3560 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3528 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3561 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); 3529 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 41555de1d1db..ec1f28c4fc4f 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -213,7 +213,6 @@ typedef struct xfs_icdinode {
213 213
214struct bhv_desc; 214struct bhv_desc;
215struct cred; 215struct cred;
216struct ktrace;
217struct xfs_buf; 216struct xfs_buf;
218struct xfs_bmap_free; 217struct xfs_bmap_free;
219struct xfs_bmbt_irec; 218struct xfs_bmbt_irec;
@@ -222,13 +221,6 @@ struct xfs_mount;
222struct xfs_trans; 221struct xfs_trans;
223struct xfs_dquot; 222struct xfs_dquot;
224 223
225#if defined(XFS_ILOCK_TRACE)
226#define XFS_ILOCK_KTRACE_SIZE 32
227extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *);
228#else
229#define xfs_ilock_trace(i,n,f,ra)
230#endif
231
232typedef struct dm_attrs_s { 224typedef struct dm_attrs_s {
233 __uint32_t da_dmevmask; /* DMIG event mask */ 225 __uint32_t da_dmevmask; /* DMIG event mask */
234 __uint16_t da_dmstate; /* DMIG state info */ 226 __uint16_t da_dmstate; /* DMIG state info */
@@ -271,26 +263,6 @@ typedef struct xfs_inode {
271 263
272 /* VFS inode */ 264 /* VFS inode */
273 struct inode i_vnode; /* embedded VFS inode */ 265 struct inode i_vnode; /* embedded VFS inode */
274
275 /* Trace buffers per inode. */
276#ifdef XFS_INODE_TRACE
277 struct ktrace *i_trace; /* general inode trace */
278#endif
279#ifdef XFS_BMAP_TRACE
280 struct ktrace *i_xtrace; /* inode extent list trace */
281#endif
282#ifdef XFS_BTREE_TRACE
283 struct ktrace *i_btrace; /* inode bmap btree trace */
284#endif
285#ifdef XFS_RW_TRACE
286 struct ktrace *i_rwtrace; /* inode read/write trace */
287#endif
288#ifdef XFS_ILOCK_TRACE
289 struct ktrace *i_lock_trace; /* inode lock/unlock trace */
290#endif
291#ifdef XFS_DIR2_TRACE
292 struct ktrace *i_dir_trace; /* inode directory trace */
293#endif
294} xfs_inode_t; 266} xfs_inode_t;
295 267
296#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ 268#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \
@@ -406,6 +378,14 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
406#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ 378#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
407 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) 379 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
408 380
381#define XFS_LOCK_FLAGS \
382 { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
383 { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
384 { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
385 { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \
386 { XFS_IUNLOCK_NONOTIFY, "IUNLOCK_NONOTIFY" }
387
388
409/* 389/*
410 * Flags for lockdep annotations. 390 * Flags for lockdep annotations.
411 * 391 *
@@ -455,6 +435,10 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
455#define XFS_ITRUNC_DEFINITE 0x1 435#define XFS_ITRUNC_DEFINITE 0x1
456#define XFS_ITRUNC_MAYBE 0x2 436#define XFS_ITRUNC_MAYBE 0x2
457 437
438#define XFS_ITRUNC_FLAGS \
439 { XFS_ITRUNC_DEFINITE, "DEFINITE" }, \
440 { XFS_ITRUNC_MAYBE, "MAYBE" }
441
458/* 442/*
459 * For multiple groups support: if S_ISGID bit is set in the parent 443 * For multiple groups support: if S_ISGID bit is set in the parent
460 * directory, group of new file is set to that of the parent, and 444 * directory, group of new file is set to that of the parent, and
@@ -507,48 +491,16 @@ void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
507void xfs_synchronize_times(xfs_inode_t *); 491void xfs_synchronize_times(xfs_inode_t *);
508void xfs_mark_inode_dirty_sync(xfs_inode_t *); 492void xfs_mark_inode_dirty_sync(xfs_inode_t *);
509 493
510#if defined(XFS_INODE_TRACE)
511
512#define INODE_TRACE_SIZE 16 /* number of trace entries */
513#define INODE_KTRACE_ENTRY 1
514#define INODE_KTRACE_EXIT 2
515#define INODE_KTRACE_HOLD 3
516#define INODE_KTRACE_REF 4
517#define INODE_KTRACE_RELE 5
518
519extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *);
520extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *);
521extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *);
522extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *);
523extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *);
524#define xfs_itrace_entry(ip) \
525 _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address)
526#define xfs_itrace_exit(ip) \
527 _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address)
528#define xfs_itrace_exit_tag(ip, tag) \
529 _xfs_itrace_exit(ip, tag, (inst_t *)__return_address)
530#define xfs_itrace_ref(ip) \
531 _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address)
532
533#else
534#define xfs_itrace_entry(a)
535#define xfs_itrace_exit(a)
536#define xfs_itrace_exit_tag(a, b)
537#define xfs_itrace_hold(a, b, c, d)
538#define xfs_itrace_ref(a)
539#define xfs_itrace_rele(a, b, c, d)
540#endif
541
542#define IHOLD(ip) \ 494#define IHOLD(ip) \
543do { \ 495do { \
544 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ 496 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
545 atomic_inc(&(VFS_I(ip)->i_count)); \ 497 atomic_inc(&(VFS_I(ip)->i_count)); \
546 xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ 498 trace_xfs_ihold(ip, _THIS_IP_); \
547} while (0) 499} while (0)
548 500
549#define IRELE(ip) \ 501#define IRELE(ip) \
550do { \ 502do { \
551 xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ 503 trace_xfs_irele(ip, _THIS_IP_); \
552 iput(VFS_I(ip)); \ 504 iput(VFS_I(ip)); \
553} while (0) 505} while (0)
554 506
@@ -577,11 +529,11 @@ int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
577int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int); 529int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int);
578 530
579xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t); 531xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
580void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, 532void xfs_iext_insert(xfs_inode_t *, xfs_extnum_t, xfs_extnum_t,
581 xfs_bmbt_irec_t *); 533 xfs_bmbt_irec_t *, int);
582void xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int); 534void xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int);
583void xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int); 535void xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int);
584void xfs_iext_remove(xfs_ifork_t *, xfs_extnum_t, int); 536void xfs_iext_remove(xfs_inode_t *, xfs_extnum_t, int, int);
585void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int); 537void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int);
586void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); 538void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int);
587void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int); 539void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 9794b876d6ff..f38855d21ea5 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -41,6 +41,7 @@
41#include "xfs_ialloc.h" 41#include "xfs_ialloc.h"
42#include "xfs_rw.h" 42#include "xfs_rw.h"
43#include "xfs_error.h" 43#include "xfs_error.h"
44#include "xfs_trace.h"
44 45
45 46
46kmem_zone_t *xfs_ili_zone; /* inode log item zone */ 47kmem_zone_t *xfs_ili_zone; /* inode log item zone */
@@ -800,7 +801,9 @@ xfs_inode_item_pushbuf(
800 !completion_done(&ip->i_flush)); 801 !completion_done(&ip->i_flush));
801 iip->ili_pushbuf_flag = 0; 802 iip->ili_pushbuf_flag = 0;
802 xfs_iunlock(ip, XFS_ILOCK_SHARED); 803 xfs_iunlock(ip, XFS_ILOCK_SHARED);
803 xfs_buftrace("INODE ITEM PUSH", bp); 804
805 trace_xfs_inode_item_push(bp, _RET_IP_);
806
804 if (XFS_BUF_ISPINNED(bp)) { 807 if (XFS_BUF_ISPINNED(bp)) {
805 xfs_log_force(mp, (xfs_lsn_t)0, 808 xfs_log_force(mp, (xfs_lsn_t)0,
806 XFS_LOG_FORCE); 809 XFS_LOG_FORCE);
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 65bae4c9b8bf..cc8df1ac7783 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -127,7 +127,7 @@ static inline int xfs_ilog_fdata(int w)
127#ifdef __KERNEL__ 127#ifdef __KERNEL__
128 128
129struct xfs_buf; 129struct xfs_buf;
130struct xfs_bmbt_rec_64; 130struct xfs_bmbt_rec;
131struct xfs_inode; 131struct xfs_inode;
132struct xfs_mount; 132struct xfs_mount;
133 133
@@ -140,9 +140,9 @@ typedef struct xfs_inode_log_item {
140 unsigned short ili_flags; /* misc flags */ 140 unsigned short ili_flags; /* misc flags */
141 unsigned short ili_logged; /* flushed logged data */ 141 unsigned short ili_logged; /* flushed logged data */
142 unsigned int ili_last_fields; /* fields when flushed */ 142 unsigned int ili_last_fields; /* fields when flushed */
143 struct xfs_bmbt_rec_64 *ili_extents_buf; /* array of logged 143 struct xfs_bmbt_rec *ili_extents_buf; /* array of logged
144 data exts */ 144 data exts */
145 struct xfs_bmbt_rec_64 *ili_aextents_buf; /* array of logged 145 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
146 attr exts */ 146 attr exts */
147 unsigned int ili_pushbuf_flag; /* one bit used in push_ail */ 147 unsigned int ili_pushbuf_flag; /* one bit used in push_ail */
148 148
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 7294abce6ef2..0b65039951a0 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -47,72 +47,8 @@
47#include "xfs_trans_space.h" 47#include "xfs_trans_space.h"
48#include "xfs_utils.h" 48#include "xfs_utils.h"
49#include "xfs_iomap.h" 49#include "xfs_iomap.h"
50#include "xfs_trace.h"
50 51
51#if defined(XFS_RW_TRACE)
52void
53xfs_iomap_enter_trace(
54 int tag,
55 xfs_inode_t *ip,
56 xfs_off_t offset,
57 ssize_t count)
58{
59 if (!ip->i_rwtrace)
60 return;
61
62 ktrace_enter(ip->i_rwtrace,
63 (void *)((unsigned long)tag),
64 (void *)ip,
65 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
66 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
67 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
68 (void *)((unsigned long)(offset & 0xffffffff)),
69 (void *)((unsigned long)count),
70 (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)),
71 (void *)((unsigned long)(ip->i_new_size & 0xffffffff)),
72 (void *)((unsigned long)current_pid()),
73 (void *)NULL,
74 (void *)NULL,
75 (void *)NULL,
76 (void *)NULL,
77 (void *)NULL,
78 (void *)NULL);
79}
80
81void
82xfs_iomap_map_trace(
83 int tag,
84 xfs_inode_t *ip,
85 xfs_off_t offset,
86 ssize_t count,
87 xfs_iomap_t *iomapp,
88 xfs_bmbt_irec_t *imapp,
89 int flags)
90{
91 if (!ip->i_rwtrace)
92 return;
93
94 ktrace_enter(ip->i_rwtrace,
95 (void *)((unsigned long)tag),
96 (void *)ip,
97 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
98 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
99 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
100 (void *)((unsigned long)(offset & 0xffffffff)),
101 (void *)((unsigned long)count),
102 (void *)((unsigned long)flags),
103 (void *)((unsigned long)((iomapp->iomap_offset >> 32) & 0xffffffff)),
104 (void *)((unsigned long)(iomapp->iomap_offset & 0xffffffff)),
105 (void *)((unsigned long)(iomapp->iomap_delta)),
106 (void *)((unsigned long)(iomapp->iomap_bsize)),
107 (void *)((unsigned long)(iomapp->iomap_bn)),
108 (void *)(__psint_t)(imapp->br_startoff),
109 (void *)((unsigned long)(imapp->br_blockcount)),
110 (void *)(__psint_t)(imapp->br_startblock));
111}
112#else
113#define xfs_iomap_enter_trace(tag, io, offset, count)
114#define xfs_iomap_map_trace(tag, io, offset, count, iomapp, imapp, flags)
115#endif
116 52
117#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ 53#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
118 << mp->m_writeio_log) 54 << mp->m_writeio_log)
@@ -187,21 +123,20 @@ xfs_iomap(
187 if (XFS_FORCED_SHUTDOWN(mp)) 123 if (XFS_FORCED_SHUTDOWN(mp))
188 return XFS_ERROR(EIO); 124 return XFS_ERROR(EIO);
189 125
126 trace_xfs_iomap_enter(ip, offset, count, flags, NULL);
127
190 switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) { 128 switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) {
191 case BMAPI_READ: 129 case BMAPI_READ:
192 xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, ip, offset, count);
193 lockmode = xfs_ilock_map_shared(ip); 130 lockmode = xfs_ilock_map_shared(ip);
194 bmapi_flags = XFS_BMAPI_ENTIRE; 131 bmapi_flags = XFS_BMAPI_ENTIRE;
195 break; 132 break;
196 case BMAPI_WRITE: 133 case BMAPI_WRITE:
197 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count);
198 lockmode = XFS_ILOCK_EXCL; 134 lockmode = XFS_ILOCK_EXCL;
199 if (flags & BMAPI_IGNSTATE) 135 if (flags & BMAPI_IGNSTATE)
200 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; 136 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
201 xfs_ilock(ip, lockmode); 137 xfs_ilock(ip, lockmode);
202 break; 138 break;
203 case BMAPI_ALLOCATE: 139 case BMAPI_ALLOCATE:
204 xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count);
205 lockmode = XFS_ILOCK_SHARED; 140 lockmode = XFS_ILOCK_SHARED;
206 bmapi_flags = XFS_BMAPI_ENTIRE; 141 bmapi_flags = XFS_BMAPI_ENTIRE;
207 142
@@ -237,8 +172,7 @@ xfs_iomap(
237 if (nimaps && 172 if (nimaps &&
238 (imap.br_startblock != HOLESTARTBLOCK) && 173 (imap.br_startblock != HOLESTARTBLOCK) &&
239 (imap.br_startblock != DELAYSTARTBLOCK)) { 174 (imap.br_startblock != DELAYSTARTBLOCK)) {
240 xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip, 175 trace_xfs_iomap_found(ip, offset, count, flags, &imap);
241 offset, count, iomapp, &imap, flags);
242 break; 176 break;
243 } 177 }
244 178
@@ -250,8 +184,7 @@ xfs_iomap(
250 &imap, &nimaps); 184 &imap, &nimaps);
251 } 185 }
252 if (!error) { 186 if (!error) {
253 xfs_iomap_map_trace(XFS_IOMAP_ALLOC_MAP, ip, 187 trace_xfs_iomap_alloc(ip, offset, count, flags, &imap);
254 offset, count, iomapp, &imap, flags);
255 } 188 }
256 iomap_flags = IOMAP_NEW; 189 iomap_flags = IOMAP_NEW;
257 break; 190 break;
@@ -261,8 +194,7 @@ xfs_iomap(
261 lockmode = 0; 194 lockmode = 0;
262 195
263 if (nimaps && !isnullstartblock(imap.br_startblock)) { 196 if (nimaps && !isnullstartblock(imap.br_startblock)) {
264 xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip, 197 trace_xfs_iomap_found(ip, offset, count, flags, &imap);
265 offset, count, iomapp, &imap, flags);
266 break; 198 break;
267 } 199 }
268 200
@@ -623,8 +555,7 @@ retry:
623 * delalloc blocks and retry without EOF preallocation. 555 * delalloc blocks and retry without EOF preallocation.
624 */ 556 */
625 if (nimaps == 0) { 557 if (nimaps == 0) {
626 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, 558 trace_xfs_delalloc_enospc(ip, offset, count);
627 ip, offset, count);
628 if (flushed) 559 if (flushed)
629 return XFS_ERROR(ENOSPC); 560 return XFS_ERROR(ENOSPC);
630 561
@@ -837,7 +768,7 @@ xfs_iomap_write_unwritten(
837 int committed; 768 int committed;
838 int error; 769 int error;
839 770
840 xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, ip, offset, count); 771 trace_xfs_unwritten_convert(ip, offset, count);
841 772
842 offset_fsb = XFS_B_TO_FSBT(mp, offset); 773 offset_fsb = XFS_B_TO_FSBT(mp, offset);
843 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 774 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index fdcf7b82747f..174f29990991 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -43,6 +43,14 @@ typedef enum {
43 BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */ 43 BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */
44} bmapi_flags_t; 44} bmapi_flags_t;
45 45
46#define BMAPI_FLAGS \
47 { BMAPI_READ, "READ" }, \
48 { BMAPI_WRITE, "WRITE" }, \
49 { BMAPI_ALLOCATE, "ALLOCATE" }, \
50 { BMAPI_IGNSTATE, "IGNSTATE" }, \
51 { BMAPI_DIRECT, "DIRECT" }, \
52 { BMAPI_MMAP, "MMAP" }, \
53 { BMAPI_TRYLOCK, "TRYLOCK" }
46 54
47/* 55/*
48 * xfs_iomap_t: File system I/O map 56 * xfs_iomap_t: File system I/O map
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 9dbdff3ea484..600b5b06aaeb 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -40,6 +40,7 @@
40#include "xfs_dinode.h" 40#include "xfs_dinode.h"
41#include "xfs_inode.h" 41#include "xfs_inode.h"
42#include "xfs_rw.h" 42#include "xfs_rw.h"
43#include "xfs_trace.h"
43 44
44kmem_zone_t *xfs_log_ticket_zone; 45kmem_zone_t *xfs_log_ticket_zone;
45 46
@@ -122,85 +123,6 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
122 123
123STATIC int xlog_iclogs_empty(xlog_t *log); 124STATIC int xlog_iclogs_empty(xlog_t *log);
124 125
125#if defined(XFS_LOG_TRACE)
126
127#define XLOG_TRACE_LOGGRANT_SIZE 2048
128#define XLOG_TRACE_ICLOG_SIZE 256
129
130void
131xlog_trace_loggrant_alloc(xlog_t *log)
132{
133 log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS);
134}
135
136void
137xlog_trace_loggrant_dealloc(xlog_t *log)
138{
139 ktrace_free(log->l_grant_trace);
140}
141
142void
143xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
144{
145 unsigned long cnts;
146
147 /* ticket counts are 1 byte each */
148 cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
149
150 ktrace_enter(log->l_grant_trace,
151 (void *)tic,
152 (void *)log->l_reserve_headq,
153 (void *)log->l_write_headq,
154 (void *)((unsigned long)log->l_grant_reserve_cycle),
155 (void *)((unsigned long)log->l_grant_reserve_bytes),
156 (void *)((unsigned long)log->l_grant_write_cycle),
157 (void *)((unsigned long)log->l_grant_write_bytes),
158 (void *)((unsigned long)log->l_curr_cycle),
159 (void *)((unsigned long)log->l_curr_block),
160 (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)),
161 (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)),
162 (void *)string,
163 (void *)((unsigned long)tic->t_trans_type),
164 (void *)cnts,
165 (void *)((unsigned long)tic->t_curr_res),
166 (void *)((unsigned long)tic->t_unit_res));
167}
168
169void
170xlog_trace_iclog_alloc(xlog_in_core_t *iclog)
171{
172 iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS);
173}
174
175void
176xlog_trace_iclog_dealloc(xlog_in_core_t *iclog)
177{
178 ktrace_free(iclog->ic_trace);
179}
180
181void
182xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
183{
184 ktrace_enter(iclog->ic_trace,
185 (void *)((unsigned long)state),
186 (void *)((unsigned long)current_pid()),
187 (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
188 (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
189 (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
190 (void *)NULL, (void *)NULL);
191}
192#else
193
194#define xlog_trace_loggrant_alloc(log)
195#define xlog_trace_loggrant_dealloc(log)
196#define xlog_trace_loggrant(log,tic,string)
197
198#define xlog_trace_iclog_alloc(iclog)
199#define xlog_trace_iclog_dealloc(iclog)
200#define xlog_trace_iclog(iclog,state)
201
202#endif /* XFS_LOG_TRACE */
203
204 126
205static void 127static void
206xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) 128xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
@@ -353,15 +275,17 @@ xfs_log_done(xfs_mount_t *mp,
353 275
354 if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 || 276 if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 ||
355 (flags & XFS_LOG_REL_PERM_RESERV)) { 277 (flags & XFS_LOG_REL_PERM_RESERV)) {
278 trace_xfs_log_done_nonperm(log, ticket);
279
356 /* 280 /*
357 * Release ticket if not permanent reservation or a specific 281 * Release ticket if not permanent reservation or a specific
358 * request has been made to release a permanent reservation. 282 * request has been made to release a permanent reservation.
359 */ 283 */
360 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
361 xlog_ungrant_log_space(log, ticket); 284 xlog_ungrant_log_space(log, ticket);
362 xfs_log_ticket_put(ticket); 285 xfs_log_ticket_put(ticket);
363 } else { 286 } else {
364 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); 287 trace_xfs_log_done_perm(log, ticket);
288
365 xlog_regrant_reserve_log_space(log, ticket); 289 xlog_regrant_reserve_log_space(log, ticket);
366 /* If this ticket was a permanent reservation and we aren't 290 /* If this ticket was a permanent reservation and we aren't
367 * trying to release it, reset the inited flags; so next time 291 * trying to release it, reset the inited flags; so next time
@@ -505,10 +429,13 @@ xfs_log_reserve(xfs_mount_t *mp,
505 429
506 XFS_STATS_INC(xs_try_logspace); 430 XFS_STATS_INC(xs_try_logspace);
507 431
432
508 if (*ticket != NULL) { 433 if (*ticket != NULL) {
509 ASSERT(flags & XFS_LOG_PERM_RESERV); 434 ASSERT(flags & XFS_LOG_PERM_RESERV);
510 internal_ticket = (xlog_ticket_t *)*ticket; 435 internal_ticket = (xlog_ticket_t *)*ticket;
511 xlog_trace_loggrant(log, internal_ticket, "xfs_log_reserve: existing ticket (permanent trans)"); 436
437 trace_xfs_log_reserve(log, internal_ticket);
438
512 xlog_grant_push_ail(mp, internal_ticket->t_unit_res); 439 xlog_grant_push_ail(mp, internal_ticket->t_unit_res);
513 retval = xlog_regrant_write_log_space(log, internal_ticket); 440 retval = xlog_regrant_write_log_space(log, internal_ticket);
514 } else { 441 } else {
@@ -519,10 +446,9 @@ xfs_log_reserve(xfs_mount_t *mp,
519 return XFS_ERROR(ENOMEM); 446 return XFS_ERROR(ENOMEM);
520 internal_ticket->t_trans_type = t_type; 447 internal_ticket->t_trans_type = t_type;
521 *ticket = internal_ticket; 448 *ticket = internal_ticket;
522 xlog_trace_loggrant(log, internal_ticket, 449
523 (internal_ticket->t_flags & XLOG_TIC_PERM_RESERV) ? 450 trace_xfs_log_reserve(log, internal_ticket);
524 "xfs_log_reserve: create new ticket (permanent trans)" : 451
525 "xfs_log_reserve: create new ticket");
526 xlog_grant_push_ail(mp, 452 xlog_grant_push_ail(mp,
527 (internal_ticket->t_unit_res * 453 (internal_ticket->t_unit_res *
528 internal_ticket->t_cnt)); 454 internal_ticket->t_cnt));
@@ -734,7 +660,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
734 spin_unlock(&log->l_icloglock); 660 spin_unlock(&log->l_icloglock);
735 } 661 }
736 if (tic) { 662 if (tic) {
737 xlog_trace_loggrant(log, tic, "unmount rec"); 663 trace_xfs_log_umount_write(log, tic);
738 xlog_ungrant_log_space(log, tic); 664 xlog_ungrant_log_space(log, tic);
739 xfs_log_ticket_put(tic); 665 xfs_log_ticket_put(tic);
740 } 666 }
@@ -1030,7 +956,6 @@ xlog_iodone(xfs_buf_t *bp)
1030 xfs_fs_cmn_err(CE_WARN, l->l_mp, 956 xfs_fs_cmn_err(CE_WARN, l->l_mp,
1031 "xlog_iodone: Barriers are no longer supported" 957 "xlog_iodone: Barriers are no longer supported"
1032 " by device. Disabling barriers\n"); 958 " by device. Disabling barriers\n");
1033 xfs_buftrace("XLOG_IODONE BARRIERS OFF", bp);
1034 } 959 }
1035 960
1036 /* 961 /*
@@ -1085,13 +1010,10 @@ xlog_bdstrat_cb(struct xfs_buf *bp)
1085 return 0; 1010 return 0;
1086 } 1011 }
1087 1012
1088 xfs_buftrace("XLOG__BDSTRAT IOERROR", bp);
1089 XFS_BUF_ERROR(bp, EIO); 1013 XFS_BUF_ERROR(bp, EIO);
1090 XFS_BUF_STALE(bp); 1014 XFS_BUF_STALE(bp);
1091 xfs_biodone(bp); 1015 xfs_biodone(bp);
1092 return XFS_ERROR(EIO); 1016 return XFS_ERROR(EIO);
1093
1094
1095} 1017}
1096 1018
1097/* 1019/*
@@ -1246,7 +1168,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1246 spin_lock_init(&log->l_grant_lock); 1168 spin_lock_init(&log->l_grant_lock);
1247 sv_init(&log->l_flush_wait, 0, "flush_wait"); 1169 sv_init(&log->l_flush_wait, 0, "flush_wait");
1248 1170
1249 xlog_trace_loggrant_alloc(log);
1250 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1171 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
1251 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); 1172 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
1252 1173
@@ -1305,8 +1226,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1305 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); 1226 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
1306 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); 1227 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
1307 1228
1308 xlog_trace_iclog_alloc(iclog);
1309
1310 iclogp = &iclog->ic_next; 1229 iclogp = &iclog->ic_next;
1311 } 1230 }
1312 *iclogp = log->l_iclog; /* complete ring */ 1231 *iclogp = log->l_iclog; /* complete ring */
@@ -1321,13 +1240,11 @@ out_free_iclog:
1321 sv_destroy(&iclog->ic_force_wait); 1240 sv_destroy(&iclog->ic_force_wait);
1322 sv_destroy(&iclog->ic_write_wait); 1241 sv_destroy(&iclog->ic_write_wait);
1323 xfs_buf_free(iclog->ic_bp); 1242 xfs_buf_free(iclog->ic_bp);
1324 xlog_trace_iclog_dealloc(iclog);
1325 } 1243 }
1326 kmem_free(iclog); 1244 kmem_free(iclog);
1327 } 1245 }
1328 spinlock_destroy(&log->l_icloglock); 1246 spinlock_destroy(&log->l_icloglock);
1329 spinlock_destroy(&log->l_grant_lock); 1247 spinlock_destroy(&log->l_grant_lock);
1330 xlog_trace_loggrant_dealloc(log);
1331 xfs_buf_free(log->l_xbuf); 1248 xfs_buf_free(log->l_xbuf);
1332out_free_log: 1249out_free_log:
1333 kmem_free(log); 1250 kmem_free(log);
@@ -1524,6 +1441,7 @@ xlog_sync(xlog_t *log,
1524 XFS_BUF_ZEROFLAGS(bp); 1441 XFS_BUF_ZEROFLAGS(bp);
1525 XFS_BUF_BUSY(bp); 1442 XFS_BUF_BUSY(bp);
1526 XFS_BUF_ASYNC(bp); 1443 XFS_BUF_ASYNC(bp);
1444 bp->b_flags |= XBF_LOG_BUFFER;
1527 /* 1445 /*
1528 * Do an ordered write for the log block. 1446 * Do an ordered write for the log block.
1529 * Its unnecessary to flush the first split block in the log wrap case. 1447 * Its unnecessary to flush the first split block in the log wrap case.
@@ -1561,6 +1479,7 @@ xlog_sync(xlog_t *log,
1561 XFS_BUF_ZEROFLAGS(bp); 1479 XFS_BUF_ZEROFLAGS(bp);
1562 XFS_BUF_BUSY(bp); 1480 XFS_BUF_BUSY(bp);
1563 XFS_BUF_ASYNC(bp); 1481 XFS_BUF_ASYNC(bp);
1482 bp->b_flags |= XBF_LOG_BUFFER;
1564 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) 1483 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1565 XFS_BUF_ORDERED(bp); 1484 XFS_BUF_ORDERED(bp);
1566 dptr = XFS_BUF_PTR(bp); 1485 dptr = XFS_BUF_PTR(bp);
@@ -1607,7 +1526,6 @@ xlog_dealloc_log(xlog_t *log)
1607 sv_destroy(&iclog->ic_force_wait); 1526 sv_destroy(&iclog->ic_force_wait);
1608 sv_destroy(&iclog->ic_write_wait); 1527 sv_destroy(&iclog->ic_write_wait);
1609 xfs_buf_free(iclog->ic_bp); 1528 xfs_buf_free(iclog->ic_bp);
1610 xlog_trace_iclog_dealloc(iclog);
1611 next_iclog = iclog->ic_next; 1529 next_iclog = iclog->ic_next;
1612 kmem_free(iclog); 1530 kmem_free(iclog);
1613 iclog = next_iclog; 1531 iclog = next_iclog;
@@ -1616,7 +1534,6 @@ xlog_dealloc_log(xlog_t *log)
1616 spinlock_destroy(&log->l_grant_lock); 1534 spinlock_destroy(&log->l_grant_lock);
1617 1535
1618 xfs_buf_free(log->l_xbuf); 1536 xfs_buf_free(log->l_xbuf);
1619 xlog_trace_loggrant_dealloc(log);
1620 log->l_mp->m_log = NULL; 1537 log->l_mp->m_log = NULL;
1621 kmem_free(log); 1538 kmem_free(log);
1622} /* xlog_dealloc_log */ 1539} /* xlog_dealloc_log */
@@ -2414,7 +2331,6 @@ restart:
2414 2331
2415 iclog = log->l_iclog; 2332 iclog = log->l_iclog;
2416 if (iclog->ic_state != XLOG_STATE_ACTIVE) { 2333 if (iclog->ic_state != XLOG_STATE_ACTIVE) {
2417 xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH);
2418 XFS_STATS_INC(xs_log_noiclogs); 2334 XFS_STATS_INC(xs_log_noiclogs);
2419 2335
2420 /* Wait for log writes to have flushed */ 2336 /* Wait for log writes to have flushed */
@@ -2520,13 +2436,15 @@ xlog_grant_log_space(xlog_t *log,
2520 2436
2521 /* Is there space or do we need to sleep? */ 2437 /* Is there space or do we need to sleep? */
2522 spin_lock(&log->l_grant_lock); 2438 spin_lock(&log->l_grant_lock);
2523 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: enter"); 2439
2440 trace_xfs_log_grant_enter(log, tic);
2524 2441
2525 /* something is already sleeping; insert new transaction at end */ 2442 /* something is already sleeping; insert new transaction at end */
2526 if (log->l_reserve_headq) { 2443 if (log->l_reserve_headq) {
2527 xlog_ins_ticketq(&log->l_reserve_headq, tic); 2444 xlog_ins_ticketq(&log->l_reserve_headq, tic);
2528 xlog_trace_loggrant(log, tic, 2445
2529 "xlog_grant_log_space: sleep 1"); 2446 trace_xfs_log_grant_sleep1(log, tic);
2447
2530 /* 2448 /*
2531 * Gotta check this before going to sleep, while we're 2449 * Gotta check this before going to sleep, while we're
2532 * holding the grant lock. 2450 * holding the grant lock.
@@ -2540,8 +2458,7 @@ xlog_grant_log_space(xlog_t *log,
2540 * If we got an error, and the filesystem is shutting down, 2458 * If we got an error, and the filesystem is shutting down,
2541 * we'll catch it down below. So just continue... 2459 * we'll catch it down below. So just continue...
2542 */ 2460 */
2543 xlog_trace_loggrant(log, tic, 2461 trace_xfs_log_grant_wake1(log, tic);
2544 "xlog_grant_log_space: wake 1");
2545 spin_lock(&log->l_grant_lock); 2462 spin_lock(&log->l_grant_lock);
2546 } 2463 }
2547 if (tic->t_flags & XFS_LOG_PERM_RESERV) 2464 if (tic->t_flags & XFS_LOG_PERM_RESERV)
@@ -2558,8 +2475,9 @@ redo:
2558 if (free_bytes < need_bytes) { 2475 if (free_bytes < need_bytes) {
2559 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2476 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2560 xlog_ins_ticketq(&log->l_reserve_headq, tic); 2477 xlog_ins_ticketq(&log->l_reserve_headq, tic);
2561 xlog_trace_loggrant(log, tic, 2478
2562 "xlog_grant_log_space: sleep 2"); 2479 trace_xfs_log_grant_sleep2(log, tic);
2480
2563 spin_unlock(&log->l_grant_lock); 2481 spin_unlock(&log->l_grant_lock);
2564 xlog_grant_push_ail(log->l_mp, need_bytes); 2482 xlog_grant_push_ail(log->l_mp, need_bytes);
2565 spin_lock(&log->l_grant_lock); 2483 spin_lock(&log->l_grant_lock);
@@ -2571,8 +2489,8 @@ redo:
2571 if (XLOG_FORCED_SHUTDOWN(log)) 2489 if (XLOG_FORCED_SHUTDOWN(log))
2572 goto error_return; 2490 goto error_return;
2573 2491
2574 xlog_trace_loggrant(log, tic, 2492 trace_xfs_log_grant_wake2(log, tic);
2575 "xlog_grant_log_space: wake 2"); 2493
2576 goto redo; 2494 goto redo;
2577 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2495 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2578 xlog_del_ticketq(&log->l_reserve_headq, tic); 2496 xlog_del_ticketq(&log->l_reserve_headq, tic);
@@ -2592,7 +2510,7 @@ redo:
2592 ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); 2510 ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
2593 } 2511 }
2594#endif 2512#endif
2595 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: exit"); 2513 trace_xfs_log_grant_exit(log, tic);
2596 xlog_verify_grant_head(log, 1); 2514 xlog_verify_grant_head(log, 1);
2597 spin_unlock(&log->l_grant_lock); 2515 spin_unlock(&log->l_grant_lock);
2598 return 0; 2516 return 0;
@@ -2600,7 +2518,9 @@ redo:
2600 error_return: 2518 error_return:
2601 if (tic->t_flags & XLOG_TIC_IN_Q) 2519 if (tic->t_flags & XLOG_TIC_IN_Q)
2602 xlog_del_ticketq(&log->l_reserve_headq, tic); 2520 xlog_del_ticketq(&log->l_reserve_headq, tic);
2603 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret"); 2521
2522 trace_xfs_log_grant_error(log, tic);
2523
2604 /* 2524 /*
2605 * If we are failing, make sure the ticket doesn't have any 2525 * If we are failing, make sure the ticket doesn't have any
2606 * current reservations. We don't want to add this back when 2526 * current reservations. We don't want to add this back when
@@ -2640,7 +2560,8 @@ xlog_regrant_write_log_space(xlog_t *log,
2640#endif 2560#endif
2641 2561
2642 spin_lock(&log->l_grant_lock); 2562 spin_lock(&log->l_grant_lock);
2643 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: enter"); 2563
2564 trace_xfs_log_regrant_write_enter(log, tic);
2644 2565
2645 if (XLOG_FORCED_SHUTDOWN(log)) 2566 if (XLOG_FORCED_SHUTDOWN(log))
2646 goto error_return; 2567 goto error_return;
@@ -2669,8 +2590,8 @@ xlog_regrant_write_log_space(xlog_t *log,
2669 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2590 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2670 xlog_ins_ticketq(&log->l_write_headq, tic); 2591 xlog_ins_ticketq(&log->l_write_headq, tic);
2671 2592
2672 xlog_trace_loggrant(log, tic, 2593 trace_xfs_log_regrant_write_sleep1(log, tic);
2673 "xlog_regrant_write_log_space: sleep 1"); 2594
2674 spin_unlock(&log->l_grant_lock); 2595 spin_unlock(&log->l_grant_lock);
2675 xlog_grant_push_ail(log->l_mp, need_bytes); 2596 xlog_grant_push_ail(log->l_mp, need_bytes);
2676 spin_lock(&log->l_grant_lock); 2597 spin_lock(&log->l_grant_lock);
@@ -2685,8 +2606,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2685 if (XLOG_FORCED_SHUTDOWN(log)) 2606 if (XLOG_FORCED_SHUTDOWN(log))
2686 goto error_return; 2607 goto error_return;
2687 2608
2688 xlog_trace_loggrant(log, tic, 2609 trace_xfs_log_regrant_write_wake1(log, tic);
2689 "xlog_regrant_write_log_space: wake 1");
2690 } 2610 }
2691 } 2611 }
2692 2612
@@ -2704,6 +2624,8 @@ redo:
2704 spin_lock(&log->l_grant_lock); 2624 spin_lock(&log->l_grant_lock);
2705 2625
2706 XFS_STATS_INC(xs_sleep_logspace); 2626 XFS_STATS_INC(xs_sleep_logspace);
2627 trace_xfs_log_regrant_write_sleep2(log, tic);
2628
2707 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); 2629 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2708 2630
2709 /* If we're shutting down, this tic is already off the queue */ 2631 /* If we're shutting down, this tic is already off the queue */
@@ -2711,8 +2633,7 @@ redo:
2711 if (XLOG_FORCED_SHUTDOWN(log)) 2633 if (XLOG_FORCED_SHUTDOWN(log))
2712 goto error_return; 2634 goto error_return;
2713 2635
2714 xlog_trace_loggrant(log, tic, 2636 trace_xfs_log_regrant_write_wake2(log, tic);
2715 "xlog_regrant_write_log_space: wake 2");
2716 goto redo; 2637 goto redo;
2717 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2638 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2718 xlog_del_ticketq(&log->l_write_headq, tic); 2639 xlog_del_ticketq(&log->l_write_headq, tic);
@@ -2727,7 +2648,8 @@ redo:
2727 } 2648 }
2728#endif 2649#endif
2729 2650
2730 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: exit"); 2651 trace_xfs_log_regrant_write_exit(log, tic);
2652
2731 xlog_verify_grant_head(log, 1); 2653 xlog_verify_grant_head(log, 1);
2732 spin_unlock(&log->l_grant_lock); 2654 spin_unlock(&log->l_grant_lock);
2733 return 0; 2655 return 0;
@@ -2736,7 +2658,9 @@ redo:
2736 error_return: 2658 error_return:
2737 if (tic->t_flags & XLOG_TIC_IN_Q) 2659 if (tic->t_flags & XLOG_TIC_IN_Q)
2738 xlog_del_ticketq(&log->l_reserve_headq, tic); 2660 xlog_del_ticketq(&log->l_reserve_headq, tic);
2739 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret"); 2661
2662 trace_xfs_log_regrant_write_error(log, tic);
2663
2740 /* 2664 /*
2741 * If we are failing, make sure the ticket doesn't have any 2665 * If we are failing, make sure the ticket doesn't have any
2742 * current reservations. We don't want to add this back when 2666 * current reservations. We don't want to add this back when
@@ -2760,8 +2684,8 @@ STATIC void
2760xlog_regrant_reserve_log_space(xlog_t *log, 2684xlog_regrant_reserve_log_space(xlog_t *log,
2761 xlog_ticket_t *ticket) 2685 xlog_ticket_t *ticket)
2762{ 2686{
2763 xlog_trace_loggrant(log, ticket, 2687 trace_xfs_log_regrant_reserve_enter(log, ticket);
2764 "xlog_regrant_reserve_log_space: enter"); 2688
2765 if (ticket->t_cnt > 0) 2689 if (ticket->t_cnt > 0)
2766 ticket->t_cnt--; 2690 ticket->t_cnt--;
2767 2691
@@ -2769,8 +2693,9 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2769 xlog_grant_sub_space(log, ticket->t_curr_res); 2693 xlog_grant_sub_space(log, ticket->t_curr_res);
2770 ticket->t_curr_res = ticket->t_unit_res; 2694 ticket->t_curr_res = ticket->t_unit_res;
2771 xlog_tic_reset_res(ticket); 2695 xlog_tic_reset_res(ticket);
2772 xlog_trace_loggrant(log, ticket, 2696
2773 "xlog_regrant_reserve_log_space: sub current res"); 2697 trace_xfs_log_regrant_reserve_sub(log, ticket);
2698
2774 xlog_verify_grant_head(log, 1); 2699 xlog_verify_grant_head(log, 1);
2775 2700
2776 /* just return if we still have some of the pre-reserved space */ 2701 /* just return if we still have some of the pre-reserved space */
@@ -2780,8 +2705,9 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2780 } 2705 }
2781 2706
2782 xlog_grant_add_space_reserve(log, ticket->t_unit_res); 2707 xlog_grant_add_space_reserve(log, ticket->t_unit_res);
2783 xlog_trace_loggrant(log, ticket, 2708
2784 "xlog_regrant_reserve_log_space: exit"); 2709 trace_xfs_log_regrant_reserve_exit(log, ticket);
2710
2785 xlog_verify_grant_head(log, 0); 2711 xlog_verify_grant_head(log, 0);
2786 spin_unlock(&log->l_grant_lock); 2712 spin_unlock(&log->l_grant_lock);
2787 ticket->t_curr_res = ticket->t_unit_res; 2713 ticket->t_curr_res = ticket->t_unit_res;
@@ -2811,11 +2737,11 @@ xlog_ungrant_log_space(xlog_t *log,
2811 ticket->t_cnt--; 2737 ticket->t_cnt--;
2812 2738
2813 spin_lock(&log->l_grant_lock); 2739 spin_lock(&log->l_grant_lock);
2814 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter"); 2740 trace_xfs_log_ungrant_enter(log, ticket);
2815 2741
2816 xlog_grant_sub_space(log, ticket->t_curr_res); 2742 xlog_grant_sub_space(log, ticket->t_curr_res);
2817 2743
2818 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current"); 2744 trace_xfs_log_ungrant_sub(log, ticket);
2819 2745
2820 /* If this is a permanent reservation ticket, we may be able to free 2746 /* If this is a permanent reservation ticket, we may be able to free
2821 * up more space based on the remaining count. 2747 * up more space based on the remaining count.
@@ -2825,7 +2751,8 @@ xlog_ungrant_log_space(xlog_t *log,
2825 xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt); 2751 xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt);
2826 } 2752 }
2827 2753
2828 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit"); 2754 trace_xfs_log_ungrant_exit(log, ticket);
2755
2829 xlog_verify_grant_head(log, 1); 2756 xlog_verify_grant_head(log, 1);
2830 spin_unlock(&log->l_grant_lock); 2757 spin_unlock(&log->l_grant_lock);
2831 xfs_log_move_tail(log->l_mp, 1); 2758 xfs_log_move_tail(log->l_mp, 1);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 679c7c4926a2..d55662db7077 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -19,7 +19,6 @@
19#define __XFS_LOG_PRIV_H__ 19#define __XFS_LOG_PRIV_H__
20 20
21struct xfs_buf; 21struct xfs_buf;
22struct ktrace;
23struct log; 22struct log;
24struct xlog_ticket; 23struct xlog_ticket;
25struct xfs_buf_cancel; 24struct xfs_buf_cancel;
@@ -135,6 +134,12 @@ static inline uint xlog_get_client_id(__be32 i)
135#define XLOG_TIC_INITED 0x1 /* has been initialized */ 134#define XLOG_TIC_INITED 0x1 /* has been initialized */
136#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ 135#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */
137#define XLOG_TIC_IN_Q 0x4 136#define XLOG_TIC_IN_Q 0x4
137
138#define XLOG_TIC_FLAGS \
139 { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \
140 { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }, \
141 { XLOG_TIC_IN_Q, "XLOG_TIC_IN_Q" }
142
138#endif /* __KERNEL__ */ 143#endif /* __KERNEL__ */
139 144
140#define XLOG_UNMOUNT_TYPE 0x556e /* Un for Unmount */ 145#define XLOG_UNMOUNT_TYPE 0x556e /* Un for Unmount */
@@ -361,9 +366,6 @@ typedef struct xlog_in_core {
361 int ic_bwritecnt; 366 int ic_bwritecnt;
362 unsigned short ic_state; 367 unsigned short ic_state;
363 char *ic_datap; /* pointer to iclog data */ 368 char *ic_datap; /* pointer to iclog data */
364#ifdef XFS_LOG_TRACE
365 struct ktrace *ic_trace;
366#endif
367 369
368 /* Callback structures need their own cacheline */ 370 /* Callback structures need their own cacheline */
369 spinlock_t ic_callback_lock ____cacheline_aligned_in_smp; 371 spinlock_t ic_callback_lock ____cacheline_aligned_in_smp;
@@ -429,10 +431,6 @@ typedef struct log {
429 int l_grant_write_cycle; 431 int l_grant_write_cycle;
430 int l_grant_write_bytes; 432 int l_grant_write_bytes;
431 433
432#ifdef XFS_LOG_TRACE
433 struct ktrace *l_grant_trace;
434#endif
435
436 /* The following field are used for debugging; need to hold icloglock */ 434 /* The following field are used for debugging; need to hold icloglock */
437#ifdef DEBUG 435#ifdef DEBUG
438 char *l_iclog_bak[XLOG_MAX_ICLOGS]; 436 char *l_iclog_bak[XLOG_MAX_ICLOGS];
@@ -456,12 +454,6 @@ extern void xlog_put_bp(struct xfs_buf *);
456 454
457extern kmem_zone_t *xfs_log_ticket_zone; 455extern kmem_zone_t *xfs_log_ticket_zone;
458 456
459/* iclog tracing */
460#define XLOG_TRACE_GRAB_FLUSH 1
461#define XLOG_TRACE_REL_FLUSH 2
462#define XLOG_TRACE_SLEEP_FLUSH 3
463#define XLOG_TRACE_WAKE_FLUSH 4
464
465/* 457/*
466 * Unmount record type is used as a pseudo transaction type for the ticket. 458 * Unmount record type is used as a pseudo transaction type for the ticket.
467 * It's value must be outside the range of XFS_TRANS_* values. 459 * It's value must be outside the range of XFS_TRANS_* values.
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 1ec98ed914d4..69ac2e5ef20c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -46,6 +46,7 @@
46#include "xfs_quota.h" 46#include "xfs_quota.h"
47#include "xfs_rw.h" 47#include "xfs_rw.h"
48#include "xfs_utils.h" 48#include "xfs_utils.h"
49#include "xfs_trace.h"
49 50
50STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); 51STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *);
51STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); 52STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t);
@@ -225,16 +226,10 @@ xlog_header_check_dump(
225 xfs_mount_t *mp, 226 xfs_mount_t *mp,
226 xlog_rec_header_t *head) 227 xlog_rec_header_t *head)
227{ 228{
228 int b; 229 cmn_err(CE_DEBUG, "%s: SB : uuid = %pU, fmt = %d\n",
229 230 __func__, &mp->m_sb.sb_uuid, XLOG_FMT);
230 cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__); 231 cmn_err(CE_DEBUG, " log : uuid = %pU, fmt = %d\n",
231 for (b = 0; b < 16; b++) 232 &head->h_fs_uuid, be32_to_cpu(head->h_fmt));
232 cmn_err(CE_DEBUG, "%02x", ((__uint8_t *)&mp->m_sb.sb_uuid)[b]);
233 cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT);
234 cmn_err(CE_DEBUG, " log : uuid = ");
235 for (b = 0; b < 16; b++)
236 cmn_err(CE_DEBUG, "%02x", ((__uint8_t *)&head->h_fs_uuid)[b]);
237 cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt));
238} 233}
239#else 234#else
240#define xlog_header_check_dump(mp, head) 235#define xlog_header_check_dump(mp, head)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index bfffd6334abb..eb403b40e120 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -44,6 +44,8 @@
44#include "xfs_quota.h" 44#include "xfs_quota.h"
45#include "xfs_fsops.h" 45#include "xfs_fsops.h"
46#include "xfs_utils.h" 46#include "xfs_utils.h"
47#include "xfs_trace.h"
48
47 49
48STATIC void xfs_unmountfs_wait(xfs_mount_t *); 50STATIC void xfs_unmountfs_wait(xfs_mount_t *);
49 51
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 3ec91ac74c2a..91bfd60f4c74 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -92,6 +92,14 @@ typedef struct xfs_dqblk {
92 92
93#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) 93#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
94 94
95#define XFS_DQ_FLAGS \
96 { XFS_DQ_USER, "USER" }, \
97 { XFS_DQ_PROJ, "PROJ" }, \
98 { XFS_DQ_GROUP, "GROUP" }, \
99 { XFS_DQ_DIRTY, "DIRTY" }, \
100 { XFS_DQ_WANT, "WANT" }, \
101 { XFS_DQ_INACTIVE, "INACTIVE" }
102
95/* 103/*
96 * In the worst case, when both user and group quotas are on, 104 * In the worst case, when both user and group quotas are on,
97 * we can have a max of three dquots changing in a single transaction. 105 * we can have a max of three dquots changing in a single transaction.
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index b81deea0ce19..fc1cda23b817 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -39,6 +39,7 @@
39#include "xfs_utils.h" 39#include "xfs_utils.h"
40#include "xfs_trans_space.h" 40#include "xfs_trans_space.h"
41#include "xfs_vnodeops.h" 41#include "xfs_vnodeops.h"
42#include "xfs_trace.h"
42 43
43 44
44/* 45/*
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 385f6dceba5d..6be05f756d59 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -45,6 +45,7 @@
45#include "xfs_inode_item.h" 45#include "xfs_inode_item.h"
46#include "xfs_trans_space.h" 46#include "xfs_trans_space.h"
47#include "xfs_utils.h" 47#include "xfs_utils.h"
48#include "xfs_trace.h"
48 49
49 50
50/* 51/*
@@ -1516,6 +1517,8 @@ xfs_rtfree_range(
1516 */ 1517 */
1517 error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, 1518 error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
1518 &postblock); 1519 &postblock);
1520 if (error)
1521 return error;
1519 /* 1522 /*
1520 * If there are blocks not being freed at the front of the 1523 * If there are blocks not being freed at the front of the
1521 * old extent, add summary data for them to be allocated. 1524 * old extent, add summary data for them to be allocated.
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 4c199d18f850..5aa07caea5f1 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -44,6 +44,7 @@
44#include "xfs_error.h" 44#include "xfs_error.h"
45#include "xfs_buf_item.h" 45#include "xfs_buf_item.h"
46#include "xfs_rw.h" 46#include "xfs_rw.h"
47#include "xfs_trace.h"
47 48
48/* 49/*
49 * This is a subroutine for xfs_write() and other writers (xfs_ioctl) 50 * This is a subroutine for xfs_write() and other writers (xfs_ioctl)
@@ -171,7 +172,6 @@ xfs_bioerror(
171 * No need to wait until the buffer is unpinned. 172 * No need to wait until the buffer is unpinned.
172 * We aren't flushing it. 173 * We aren't flushing it.
173 */ 174 */
174 xfs_buftrace("XFS IOERROR", bp);
175 XFS_BUF_ERROR(bp, EIO); 175 XFS_BUF_ERROR(bp, EIO);
176 /* 176 /*
177 * We're calling biodone, so delete B_DONE flag. Either way 177 * We're calling biodone, so delete B_DONE flag. Either way
@@ -205,7 +205,6 @@ xfs_bioerror_relse(
205 ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks); 205 ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks);
206 ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone); 206 ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone);
207 207
208 xfs_buftrace("XFS IOERRELSE", bp);
209 fl = XFS_BUF_BFLAGS(bp); 208 fl = XFS_BUF_BFLAGS(bp);
210 /* 209 /*
211 * No need to wait until the buffer is unpinned. 210 * No need to wait until the buffer is unpinned.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index a0574f593f52..ca64f33c63a3 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -100,6 +100,49 @@ typedef struct xfs_trans_header {
100#define XFS_TRANS_TYPE_MAX 41 100#define XFS_TRANS_TYPE_MAX 41
101/* new transaction types need to be reflected in xfs_logprint(8) */ 101/* new transaction types need to be reflected in xfs_logprint(8) */
102 102
103#define XFS_TRANS_TYPES \
104 { XFS_TRANS_SETATTR_NOT_SIZE, "SETATTR_NOT_SIZE" }, \
105 { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \
106 { XFS_TRANS_INACTIVE, "INACTIVE" }, \
107 { XFS_TRANS_CREATE, "CREATE" }, \
108 { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \
109 { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \
110 { XFS_TRANS_REMOVE, "REMOVE" }, \
111 { XFS_TRANS_LINK, "LINK" }, \
112 { XFS_TRANS_RENAME, "RENAME" }, \
113 { XFS_TRANS_MKDIR, "MKDIR" }, \
114 { XFS_TRANS_RMDIR, "RMDIR" }, \
115 { XFS_TRANS_SYMLINK, "SYMLINK" }, \
116 { XFS_TRANS_SET_DMATTRS, "SET_DMATTRS" }, \
117 { XFS_TRANS_GROWFS, "GROWFS" }, \
118 { XFS_TRANS_STRAT_WRITE, "STRAT_WRITE" }, \
119 { XFS_TRANS_DIOSTRAT, "DIOSTRAT" }, \
120 { XFS_TRANS_WRITEID, "WRITEID" }, \
121 { XFS_TRANS_ADDAFORK, "ADDAFORK" }, \
122 { XFS_TRANS_ATTRINVAL, "ATTRINVAL" }, \
123 { XFS_TRANS_ATRUNCATE, "ATRUNCATE" }, \
124 { XFS_TRANS_ATTR_SET, "ATTR_SET" }, \
125 { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \
126 { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \
127 { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \
128 { XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \
129 { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \
130 { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \
131 { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \
132 { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \
133 { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \
134 { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \
135 { XFS_TRANS_SB_UNIT, "SB_UNIT" }, \
136 { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \
137 { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \
138 { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \
139 { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \
140 { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \
141 { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \
142 { XFS_TRANS_DUMMY1, "DUMMY1" }, \
143 { XFS_TRANS_DUMMY2, "DUMMY2" }, \
144 { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" }
145
103/* 146/*
104 * This structure is used to track log items associated with 147 * This structure is used to track log items associated with
105 * a transaction. It points to the log item and keeps some 148 * a transaction. It points to the log item and keeps some
@@ -782,6 +825,10 @@ typedef struct xfs_log_item {
782#define XFS_LI_IN_AIL 0x1 825#define XFS_LI_IN_AIL 0x1
783#define XFS_LI_ABORTED 0x2 826#define XFS_LI_ABORTED 0x2
784 827
828#define XFS_LI_FLAGS \
829 { XFS_LI_IN_AIL, "IN_AIL" }, \
830 { XFS_LI_ABORTED, "ABORTED" }
831
785typedef struct xfs_item_ops { 832typedef struct xfs_item_ops {
786 uint (*iop_size)(xfs_log_item_t *); 833 uint (*iop_size)(xfs_log_item_t *);
787 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); 834 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 03a1f701fea8..49130628d5ef 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -38,6 +38,7 @@
38#include "xfs_trans_priv.h" 38#include "xfs_trans_priv.h"
39#include "xfs_error.h" 39#include "xfs_error.h"
40#include "xfs_rw.h" 40#include "xfs_rw.h"
41#include "xfs_trace.h"
41 42
42 43
43STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *, 44STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *,
@@ -95,26 +96,23 @@ xfs_trans_get_buf(xfs_trans_t *tp,
95 } 96 }
96 if (bp != NULL) { 97 if (bp != NULL) {
97 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 98 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
98 if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) { 99 if (XFS_FORCED_SHUTDOWN(tp->t_mountp))
99 xfs_buftrace("TRANS GET RECUR SHUT", bp);
100 XFS_BUF_SUPER_STALE(bp); 100 XFS_BUF_SUPER_STALE(bp);
101 } 101
102 /* 102 /*
103 * If the buffer is stale then it was binval'ed 103 * If the buffer is stale then it was binval'ed
104 * since last read. This doesn't matter since the 104 * since last read. This doesn't matter since the
105 * caller isn't allowed to use the data anyway. 105 * caller isn't allowed to use the data anyway.
106 */ 106 */
107 else if (XFS_BUF_ISSTALE(bp)) { 107 else if (XFS_BUF_ISSTALE(bp))
108 xfs_buftrace("TRANS GET RECUR STALE", bp);
109 ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); 108 ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
110 } 109
111 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 110 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
112 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 111 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
113 ASSERT(bip != NULL); 112 ASSERT(bip != NULL);
114 ASSERT(atomic_read(&bip->bli_refcount) > 0); 113 ASSERT(atomic_read(&bip->bli_refcount) > 0);
115 bip->bli_recur++; 114 bip->bli_recur++;
116 xfs_buftrace("TRANS GET RECUR", bp); 115 trace_xfs_trans_get_buf_recur(bip);
117 xfs_buf_item_trace("GET RECUR", bip);
118 return (bp); 116 return (bp);
119 } 117 }
120 118
@@ -166,8 +164,7 @@ xfs_trans_get_buf(xfs_trans_t *tp,
166 */ 164 */
167 XFS_BUF_SET_FSPRIVATE2(bp, tp); 165 XFS_BUF_SET_FSPRIVATE2(bp, tp);
168 166
169 xfs_buftrace("TRANS GET", bp); 167 trace_xfs_trans_get_buf(bip);
170 xfs_buf_item_trace("GET", bip);
171 return (bp); 168 return (bp);
172} 169}
173 170
@@ -207,7 +204,7 @@ xfs_trans_getsb(xfs_trans_t *tp,
207 ASSERT(bip != NULL); 204 ASSERT(bip != NULL);
208 ASSERT(atomic_read(&bip->bli_refcount) > 0); 205 ASSERT(atomic_read(&bip->bli_refcount) > 0);
209 bip->bli_recur++; 206 bip->bli_recur++;
210 xfs_buf_item_trace("GETSB RECUR", bip); 207 trace_xfs_trans_getsb_recur(bip);
211 return (bp); 208 return (bp);
212 } 209 }
213 210
@@ -249,7 +246,7 @@ xfs_trans_getsb(xfs_trans_t *tp,
249 */ 246 */
250 XFS_BUF_SET_FSPRIVATE2(bp, tp); 247 XFS_BUF_SET_FSPRIVATE2(bp, tp);
251 248
252 xfs_buf_item_trace("GETSB", bip); 249 trace_xfs_trans_getsb(bip);
253 return (bp); 250 return (bp);
254} 251}
255 252
@@ -347,7 +344,7 @@ xfs_trans_read_buf(
347 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 344 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
348 ASSERT((XFS_BUF_ISERROR(bp)) == 0); 345 ASSERT((XFS_BUF_ISERROR(bp)) == 0);
349 if (!(XFS_BUF_ISDONE(bp))) { 346 if (!(XFS_BUF_ISDONE(bp))) {
350 xfs_buftrace("READ_BUF_INCORE !DONE", bp); 347 trace_xfs_trans_read_buf_io(bp, _RET_IP_);
351 ASSERT(!XFS_BUF_ISASYNC(bp)); 348 ASSERT(!XFS_BUF_ISASYNC(bp));
352 XFS_BUF_READ(bp); 349 XFS_BUF_READ(bp);
353 xfsbdstrat(tp->t_mountp, bp); 350 xfsbdstrat(tp->t_mountp, bp);
@@ -372,7 +369,7 @@ xfs_trans_read_buf(
372 * brelse it either. Just get out. 369 * brelse it either. Just get out.
373 */ 370 */
374 if (XFS_FORCED_SHUTDOWN(mp)) { 371 if (XFS_FORCED_SHUTDOWN(mp)) {
375 xfs_buftrace("READ_BUF_INCORE XFSSHUTDN", bp); 372 trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
376 *bpp = NULL; 373 *bpp = NULL;
377 return XFS_ERROR(EIO); 374 return XFS_ERROR(EIO);
378 } 375 }
@@ -382,7 +379,7 @@ xfs_trans_read_buf(
382 bip->bli_recur++; 379 bip->bli_recur++;
383 380
384 ASSERT(atomic_read(&bip->bli_refcount) > 0); 381 ASSERT(atomic_read(&bip->bli_refcount) > 0);
385 xfs_buf_item_trace("READ RECUR", bip); 382 trace_xfs_trans_read_buf_recur(bip);
386 *bpp = bp; 383 *bpp = bp;
387 return 0; 384 return 0;
388 } 385 }
@@ -402,7 +399,6 @@ xfs_trans_read_buf(
402 } 399 }
403 if (XFS_BUF_GETERROR(bp) != 0) { 400 if (XFS_BUF_GETERROR(bp) != 0) {
404 XFS_BUF_SUPER_STALE(bp); 401 XFS_BUF_SUPER_STALE(bp);
405 xfs_buftrace("READ ERROR", bp);
406 error = XFS_BUF_GETERROR(bp); 402 error = XFS_BUF_GETERROR(bp);
407 403
408 xfs_ioerror_alert("xfs_trans_read_buf", mp, 404 xfs_ioerror_alert("xfs_trans_read_buf", mp,
@@ -461,8 +457,7 @@ xfs_trans_read_buf(
461 */ 457 */
462 XFS_BUF_SET_FSPRIVATE2(bp, tp); 458 XFS_BUF_SET_FSPRIVATE2(bp, tp);
463 459
464 xfs_buftrace("TRANS READ", bp); 460 trace_xfs_trans_read_buf(bip);
465 xfs_buf_item_trace("READ", bip);
466 *bpp = bp; 461 *bpp = bp;
467 return 0; 462 return 0;
468 463
@@ -480,7 +475,7 @@ shutdown_abort:
480 ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) != 475 ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) !=
481 (XFS_B_STALE|XFS_B_DELWRI)); 476 (XFS_B_STALE|XFS_B_DELWRI));
482 477
483 xfs_buftrace("READ_BUF XFSSHUTDN", bp); 478 trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
484 xfs_buf_relse(bp); 479 xfs_buf_relse(bp);
485 *bpp = NULL; 480 *bpp = NULL;
486 return XFS_ERROR(EIO); 481 return XFS_ERROR(EIO);
@@ -546,13 +541,14 @@ xfs_trans_brelse(xfs_trans_t *tp,
546 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); 541 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
547 ASSERT(lidp != NULL); 542 ASSERT(lidp != NULL);
548 543
544 trace_xfs_trans_brelse(bip);
545
549 /* 546 /*
550 * If the release is just for a recursive lock, 547 * If the release is just for a recursive lock,
551 * then decrement the count and return. 548 * then decrement the count and return.
552 */ 549 */
553 if (bip->bli_recur > 0) { 550 if (bip->bli_recur > 0) {
554 bip->bli_recur--; 551 bip->bli_recur--;
555 xfs_buf_item_trace("RELSE RECUR", bip);
556 return; 552 return;
557 } 553 }
558 554
@@ -560,10 +556,8 @@ xfs_trans_brelse(xfs_trans_t *tp,
560 * If the buffer is dirty within this transaction, we can't 556 * If the buffer is dirty within this transaction, we can't
561 * release it until we commit. 557 * release it until we commit.
562 */ 558 */
563 if (lidp->lid_flags & XFS_LID_DIRTY) { 559 if (lidp->lid_flags & XFS_LID_DIRTY)
564 xfs_buf_item_trace("RELSE DIRTY", bip);
565 return; 560 return;
566 }
567 561
568 /* 562 /*
569 * If the buffer has been invalidated, then we can't release 563 * If the buffer has been invalidated, then we can't release
@@ -571,13 +565,10 @@ xfs_trans_brelse(xfs_trans_t *tp,
571 * as part of this transaction. This prevents us from pulling 565 * as part of this transaction. This prevents us from pulling
572 * the item from the AIL before we should. 566 * the item from the AIL before we should.
573 */ 567 */
574 if (bip->bli_flags & XFS_BLI_STALE) { 568 if (bip->bli_flags & XFS_BLI_STALE)
575 xfs_buf_item_trace("RELSE STALE", bip);
576 return; 569 return;
577 }
578 570
579 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 571 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
580 xfs_buf_item_trace("RELSE", bip);
581 572
582 /* 573 /*
583 * Free up the log item descriptor tracking the released item. 574 * Free up the log item descriptor tracking the released item.
@@ -674,7 +665,7 @@ xfs_trans_bjoin(xfs_trans_t *tp,
674 */ 665 */
675 XFS_BUF_SET_FSPRIVATE2(bp, tp); 666 XFS_BUF_SET_FSPRIVATE2(bp, tp);
676 667
677 xfs_buf_item_trace("BJOIN", bip); 668 trace_xfs_trans_bjoin(bip);
678} 669}
679 670
680/* 671/*
@@ -698,7 +689,7 @@ xfs_trans_bhold(xfs_trans_t *tp,
698 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 689 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
699 ASSERT(atomic_read(&bip->bli_refcount) > 0); 690 ASSERT(atomic_read(&bip->bli_refcount) > 0);
700 bip->bli_flags |= XFS_BLI_HOLD; 691 bip->bli_flags |= XFS_BLI_HOLD;
701 xfs_buf_item_trace("BHOLD", bip); 692 trace_xfs_trans_bhold(bip);
702} 693}
703 694
704/* 695/*
@@ -721,7 +712,8 @@ xfs_trans_bhold_release(xfs_trans_t *tp,
721 ASSERT(atomic_read(&bip->bli_refcount) > 0); 712 ASSERT(atomic_read(&bip->bli_refcount) > 0);
722 ASSERT(bip->bli_flags & XFS_BLI_HOLD); 713 ASSERT(bip->bli_flags & XFS_BLI_HOLD);
723 bip->bli_flags &= ~XFS_BLI_HOLD; 714 bip->bli_flags &= ~XFS_BLI_HOLD;
724 xfs_buf_item_trace("BHOLD RELEASE", bip); 715
716 trace_xfs_trans_bhold_release(bip);
725} 717}
726 718
727/* 719/*
@@ -767,6 +759,8 @@ xfs_trans_log_buf(xfs_trans_t *tp,
767 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); 759 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
768 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))xfs_buf_iodone; 760 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))xfs_buf_iodone;
769 761
762 trace_xfs_trans_log_buf(bip);
763
770 /* 764 /*
771 * If we invalidated the buffer within this transaction, then 765 * If we invalidated the buffer within this transaction, then
772 * cancel the invalidation now that we're dirtying the buffer 766 * cancel the invalidation now that we're dirtying the buffer
@@ -774,7 +768,6 @@ xfs_trans_log_buf(xfs_trans_t *tp,
774 * because we have a reference to the buffer this entire time. 768 * because we have a reference to the buffer this entire time.
775 */ 769 */
776 if (bip->bli_flags & XFS_BLI_STALE) { 770 if (bip->bli_flags & XFS_BLI_STALE) {
777 xfs_buf_item_trace("BLOG UNSTALE", bip);
778 bip->bli_flags &= ~XFS_BLI_STALE; 771 bip->bli_flags &= ~XFS_BLI_STALE;
779 ASSERT(XFS_BUF_ISSTALE(bp)); 772 ASSERT(XFS_BUF_ISSTALE(bp));
780 XFS_BUF_UNSTALE(bp); 773 XFS_BUF_UNSTALE(bp);
@@ -789,7 +782,6 @@ xfs_trans_log_buf(xfs_trans_t *tp,
789 lidp->lid_flags &= ~XFS_LID_BUF_STALE; 782 lidp->lid_flags &= ~XFS_LID_BUF_STALE;
790 bip->bli_flags |= XFS_BLI_LOGGED; 783 bip->bli_flags |= XFS_BLI_LOGGED;
791 xfs_buf_item_log(bip, first, last); 784 xfs_buf_item_log(bip, first, last);
792 xfs_buf_item_trace("BLOG", bip);
793} 785}
794 786
795 787
@@ -828,6 +820,8 @@ xfs_trans_binval(
828 ASSERT(lidp != NULL); 820 ASSERT(lidp != NULL);
829 ASSERT(atomic_read(&bip->bli_refcount) > 0); 821 ASSERT(atomic_read(&bip->bli_refcount) > 0);
830 822
823 trace_xfs_trans_binval(bip);
824
831 if (bip->bli_flags & XFS_BLI_STALE) { 825 if (bip->bli_flags & XFS_BLI_STALE) {
832 /* 826 /*
833 * If the buffer is already invalidated, then 827 * If the buffer is already invalidated, then
@@ -840,8 +834,6 @@ xfs_trans_binval(
840 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 834 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
841 ASSERT(lidp->lid_flags & XFS_LID_DIRTY); 835 ASSERT(lidp->lid_flags & XFS_LID_DIRTY);
842 ASSERT(tp->t_flags & XFS_TRANS_DIRTY); 836 ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
843 xfs_buftrace("XFS_BINVAL RECUR", bp);
844 xfs_buf_item_trace("BINVAL RECUR", bip);
845 return; 837 return;
846 } 838 }
847 839
@@ -875,8 +867,6 @@ xfs_trans_binval(
875 (bip->bli_format.blf_map_size * sizeof(uint))); 867 (bip->bli_format.blf_map_size * sizeof(uint)));
876 lidp->lid_flags |= XFS_LID_DIRTY|XFS_LID_BUF_STALE; 868 lidp->lid_flags |= XFS_LID_DIRTY|XFS_LID_BUF_STALE;
877 tp->t_flags |= XFS_TRANS_DIRTY; 869 tp->t_flags |= XFS_TRANS_DIRTY;
878 xfs_buftrace("XFS_BINVAL", bp);
879 xfs_buf_item_trace("BINVAL", bip);
880} 870}
881 871
882/* 872/*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 578f3f59b789..6f268756bf36 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -53,6 +53,7 @@
53#include "xfs_log_priv.h" 53#include "xfs_log_priv.h"
54#include "xfs_filestream.h" 54#include "xfs_filestream.h"
55#include "xfs_vnodeops.h" 55#include "xfs_vnodeops.h"
56#include "xfs_trace.h"
56 57
57int 58int
58xfs_setattr( 59xfs_setattr(
@@ -69,7 +70,6 @@ xfs_setattr(
69 uint commit_flags=0; 70 uint commit_flags=0;
70 uid_t uid=0, iuid=0; 71 uid_t uid=0, iuid=0;
71 gid_t gid=0, igid=0; 72 gid_t gid=0, igid=0;
72 int timeflags = 0;
73 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 73 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
74 int need_iolock = 1; 74 int need_iolock = 1;
75 75
@@ -134,16 +134,13 @@ xfs_setattr(
134 if (flags & XFS_ATTR_NOLOCK) 134 if (flags & XFS_ATTR_NOLOCK)
135 need_iolock = 0; 135 need_iolock = 0;
136 if (!(mask & ATTR_SIZE)) { 136 if (!(mask & ATTR_SIZE)) {
137 if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) || 137 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
138 (mp->m_flags & XFS_MOUNT_WSYNC)) { 138 commit_flags = 0;
139 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 139 code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp),
140 commit_flags = 0; 140 0, 0, 0);
141 if ((code = xfs_trans_reserve(tp, 0, 141 if (code) {
142 XFS_ICHANGE_LOG_RES(mp), 0, 142 lock_flags = 0;
143 0, 0))) { 143 goto error_return;
144 lock_flags = 0;
145 goto error_return;
146 }
147 } 144 }
148 } else { 145 } else {
149 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && 146 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
@@ -294,15 +291,23 @@ xfs_setattr(
294 * or we are explicitly asked to change it. This handles 291 * or we are explicitly asked to change it. This handles
295 * the semantic difference between truncate() and ftruncate() 292 * the semantic difference between truncate() and ftruncate()
296 * as implemented in the VFS. 293 * as implemented in the VFS.
294 *
295 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME
296 * is a special case where we need to update the times despite
297 * not having these flags set. For all other operations the
298 * VFS set these flags explicitly if it wants a timestamp
299 * update.
297 */ 300 */
298 if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME)) 301 if (iattr->ia_size != ip->i_size &&
299 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 302 (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
303 iattr->ia_ctime = iattr->ia_mtime =
304 current_fs_time(inode->i_sb);
305 mask |= ATTR_CTIME | ATTR_MTIME;
306 }
300 307
301 if (iattr->ia_size > ip->i_size) { 308 if (iattr->ia_size > ip->i_size) {
302 ip->i_d.di_size = iattr->ia_size; 309 ip->i_d.di_size = iattr->ia_size;
303 ip->i_size = iattr->ia_size; 310 ip->i_size = iattr->ia_size;
304 if (!(flags & XFS_ATTR_DMI))
305 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
306 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 311 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
307 } else if (iattr->ia_size <= ip->i_size || 312 } else if (iattr->ia_size <= ip->i_size ||
308 (iattr->ia_size == 0 && ip->i_d.di_nextents)) { 313 (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
@@ -373,9 +378,6 @@ xfs_setattr(
373 ip->i_d.di_gid = gid; 378 ip->i_d.di_gid = gid;
374 inode->i_gid = gid; 379 inode->i_gid = gid;
375 } 380 }
376
377 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
378 timeflags |= XFS_ICHGTIME_CHG;
379 } 381 }
380 382
381 /* 383 /*
@@ -392,51 +394,37 @@ xfs_setattr(
392 394
393 inode->i_mode &= S_IFMT; 395 inode->i_mode &= S_IFMT;
394 inode->i_mode |= mode & ~S_IFMT; 396 inode->i_mode |= mode & ~S_IFMT;
395
396 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
397 timeflags |= XFS_ICHGTIME_CHG;
398 } 397 }
399 398
400 /* 399 /*
401 * Change file access or modified times. 400 * Change file access or modified times.
402 */ 401 */
403 if (mask & (ATTR_ATIME|ATTR_MTIME)) { 402 if (mask & ATTR_ATIME) {
404 if (mask & ATTR_ATIME) { 403 inode->i_atime = iattr->ia_atime;
405 inode->i_atime = iattr->ia_atime; 404 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
406 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; 405 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
407 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; 406 ip->i_update_core = 1;
408 ip->i_update_core = 1;
409 }
410 if (mask & ATTR_MTIME) {
411 inode->i_mtime = iattr->ia_mtime;
412 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
413 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
414 timeflags &= ~XFS_ICHGTIME_MOD;
415 timeflags |= XFS_ICHGTIME_CHG;
416 }
417 if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
418 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
419 } 407 }
420 408 if (mask & ATTR_CTIME) {
421 /*
422 * Change file inode change time only if ATTR_CTIME set
423 * AND we have been called by a DMI function.
424 */
425
426 if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
427 inode->i_ctime = iattr->ia_ctime; 409 inode->i_ctime = iattr->ia_ctime;
428 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 410 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
429 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 411 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
430 ip->i_update_core = 1; 412 ip->i_update_core = 1;
431 timeflags &= ~XFS_ICHGTIME_CHG; 413 }
414 if (mask & ATTR_MTIME) {
415 inode->i_mtime = iattr->ia_mtime;
416 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
417 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
418 ip->i_update_core = 1;
432 } 419 }
433 420
434 /* 421 /*
435 * Send out timestamp changes that need to be set to the 422 * And finally, log the inode core if any attribute in it
436 * current time. Not done when called by a DMI function. 423 * has been changed.
437 */ 424 */
438 if (timeflags && !(flags & XFS_ATTR_DMI)) 425 if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE|
439 xfs_ichgtime(ip, timeflags); 426 ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
427 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
440 428
441 XFS_STATS_INC(xs_ig_attrchg); 429 XFS_STATS_INC(xs_ig_attrchg);
442 430
@@ -451,12 +439,10 @@ xfs_setattr(
451 * mix so this probably isn't worth the trouble to optimize. 439 * mix so this probably isn't worth the trouble to optimize.
452 */ 440 */
453 code = 0; 441 code = 0;
454 if (tp) { 442 if (mp->m_flags & XFS_MOUNT_WSYNC)
455 if (mp->m_flags & XFS_MOUNT_WSYNC) 443 xfs_trans_set_sync(tp);
456 xfs_trans_set_sync(tp);
457 444
458 code = xfs_trans_commit(tp, commit_flags); 445 code = xfs_trans_commit(tp, commit_flags);
459 }
460 446
461 xfs_iunlock(ip, lock_flags); 447 xfs_iunlock(ip, lock_flags);
462 448
@@ -1397,7 +1383,6 @@ xfs_lookup(
1397 if (error) 1383 if (error)
1398 goto out_free_name; 1384 goto out_free_name;
1399 1385
1400 xfs_itrace_ref(*ipp);
1401 return 0; 1386 return 0;
1402 1387
1403out_free_name: 1388out_free_name:
@@ -1543,7 +1528,6 @@ xfs_create(
1543 * At this point, we've gotten a newly allocated inode. 1528 * At this point, we've gotten a newly allocated inode.
1544 * It is locked (and joined to the transaction). 1529 * It is locked (and joined to the transaction).
1545 */ 1530 */
1546 xfs_itrace_ref(ip);
1547 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1531 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1548 1532
1549 /* 1533 /*
@@ -2003,9 +1987,6 @@ xfs_remove(
2003 if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) 1987 if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
2004 xfs_filestream_deassociate(ip); 1988 xfs_filestream_deassociate(ip);
2005 1989
2006 xfs_itrace_exit(ip);
2007 xfs_itrace_exit(dp);
2008
2009 std_return: 1990 std_return:
2010 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { 1991 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
2011 XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL, 1992 XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL,
@@ -2302,7 +2283,6 @@ xfs_symlink(
2302 goto error_return; 2283 goto error_return;
2303 goto error1; 2284 goto error1;
2304 } 2285 }
2305 xfs_itrace_ref(ip);
2306 2286
2307 /* 2287 /*
2308 * An error after we've joined dp to the transaction will result in the 2288 * An error after we've joined dp to the transaction will result in the
@@ -2845,7 +2825,6 @@ xfs_free_file_space(
2845 ioffset = offset & ~(rounding - 1); 2825 ioffset = offset & ~(rounding - 1);
2846 2826
2847 if (VN_CACHED(VFS_I(ip)) != 0) { 2827 if (VN_CACHED(VFS_I(ip)) != 0) {
2848 xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1);
2849 error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); 2828 error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED);
2850 if (error) 2829 if (error)
2851 goto out_unlock_iolock; 2830 goto out_unlock_iolock;