aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c6
-rw-r--r--fs/9p/fid.c19
-rw-r--r--fs/9p/v9fs.h6
-rw-r--r--fs/9p/vfs_file.c13
-rw-r--r--fs/9p/vfs_inode.c29
-rw-r--r--fs/9p/vfs_inode_dotl.c22
-rw-r--r--fs/9p/vfs_super.c2
-rw-r--r--fs/adfs/adfs.h25
-rw-r--r--fs/adfs/dir_f.c23
-rw-r--r--fs/adfs/dir_fplus.c119
-rw-r--r--fs/adfs/inode.c63
-rw-r--r--fs/adfs/super.c23
-rw-r--r--fs/affs/Makefile2
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/aio.c81
-rw-r--r--fs/attr.c4
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/dev-ioctl.c4
-rw-r--r--fs/autofs4/expire.c84
-rw-r--r--fs/autofs4/root.c62
-rw-r--r--fs/autofs4/waitq.c6
-rw-r--r--fs/befs/linuxvfs.c1
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/bfs/file.c1
-rw-r--r--fs/binfmt_elf.c4
-rw-r--r--fs/bio-integrity.c3
-rw-r--r--fs/bio.c12
-rw-r--r--fs/block_dev.c33
-rw-r--r--fs/btrfs/acl.c2
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/compression.c17
-rw-r--r--fs/btrfs/ctree.c159
-rw-r--r--fs/btrfs/ctree.h19
-rw-r--r--fs/btrfs/delayed-ref.c6
-rw-r--r--fs/btrfs/dir-item.c45
-rw-r--r--fs/btrfs/disk-io.c211
-rw-r--r--fs/btrfs/extent-tree.c229
-rw-r--r--fs/btrfs/extent_io.c5
-rw-r--r--fs/btrfs/extent_io.h1
-rw-r--r--fs/btrfs/file-item.c5
-rw-r--r--fs/btrfs/file.c388
-rw-r--r--fs/btrfs/free-space-cache.c510
-rw-r--r--fs/btrfs/free-space-cache.h2
-rw-r--r--fs/btrfs/inode-map.c3
-rw-r--r--fs/btrfs/inode.c367
-rw-r--r--fs/btrfs/ioctl.c104
-rw-r--r--fs/btrfs/ordered-data.c8
-rw-r--r--fs/btrfs/relocation.c8
-rw-r--r--fs/btrfs/root-tree.c6
-rw-r--r--fs/btrfs/super.c5
-rw-r--r--fs/btrfs/transaction.c14
-rw-r--r--fs/btrfs/tree-log.c57
-rw-r--r--fs/btrfs/volumes.c235
-rw-r--r--fs/btrfs/volumes.h12
-rw-r--r--fs/btrfs/xattr.c2
-rw-r--r--fs/btrfs/zlib.c3
-rw-r--r--fs/buffer.c53
-rw-r--r--fs/ceph/addr.c2
-rw-r--r--fs/ceph/debugfs.c6
-rw-r--r--fs/ceph/dir.c24
-rw-r--r--fs/ceph/file.c10
-rw-r--r--fs/ceph/inode.c25
-rw-r--r--fs/ceph/snap.c4
-rw-r--r--fs/ceph/super.c9
-rw-r--r--fs/ceph/super.h66
-rw-r--r--fs/cifs/file.c30
-rw-r--r--fs/coda/Makefile2
-rw-r--r--fs/coda/sysctl.c17
-rw-r--r--fs/compat.c3
-rw-r--r--fs/devpts/inode.c21
-rw-r--r--fs/direct-io.c7
-rw-r--r--fs/drop_caches.c24
-rw-r--r--fs/ecryptfs/crypto.c2
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h30
-rw-r--r--fs/ecryptfs/file.c9
-rw-r--r--fs/ecryptfs/inode.c24
-rw-r--r--fs/ecryptfs/keystore.c272
-rw-r--r--fs/ecryptfs/main.c10
-rw-r--r--fs/ecryptfs/mmap.c61
-rw-r--r--fs/ecryptfs/read_write.c12
-rw-r--r--fs/ecryptfs/super.c3
-rw-r--r--fs/efs/inode.c1
-rw-r--r--fs/eventpoll.c52
-rw-r--r--fs/exec.c2
-rw-r--r--fs/exofs/common.h18
-rw-r--r--fs/exofs/dir.c33
-rw-r--r--fs/exofs/exofs.h6
-rw-r--r--fs/exofs/file.c16
-rw-r--r--fs/exofs/inode.c52
-rw-r--r--fs/exofs/super.c190
-rw-r--r--fs/ext2/acl.c2
-rw-r--r--fs/ext2/ext2.h6
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/ioctl.c6
-rw-r--r--fs/ext3/acl.c2
-rw-r--r--fs/ext3/inode.c3
-rw-r--r--fs/ext3/ioctl.c6
-rw-r--r--fs/ext4/acl.c2
-rw-r--r--fs/ext4/balloc.c3
-rw-r--r--fs/ext4/ext4.h12
-rw-r--r--fs/ext4/ext4_jbd2.h7
-rw-r--r--fs/ext4/extents.c213
-rw-r--r--fs/ext4/fsync.c14
-rw-r--r--fs/ext4/ialloc.c8
-rw-r--r--fs/ext4/inode.c414
-rw-r--r--fs/ext4/ioctl.c15
-rw-r--r--fs/ext4/mballoc.c34
-rw-r--r--fs/ext4/mballoc.h2
-rw-r--r--fs/ext4/migrate.c10
-rw-r--r--fs/ext4/namei.c13
-rw-r--r--fs/ext4/page-io.c16
-rw-r--r--fs/ext4/resize.c12
-rw-r--r--fs/ext4/super.c48
-rw-r--r--fs/ext4/xattr.c4
-rw-r--r--fs/fat/inode.c1
-rw-r--r--fs/fcntl.c2
-rw-r--r--fs/fifo.c3
-rw-r--r--fs/freevxfs/vxfs_subr.c1
-rw-r--r--fs/fs-writeback.c141
-rw-r--r--fs/fuse/cuse.c12
-rw-r--r--fs/fuse/dev.c27
-rw-r--r--fs/fuse/dir.c38
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--fs/fuse/inode.c1
-rw-r--r--fs/generic_acl.c2
-rw-r--r--fs/gfs2/Makefile2
-rw-r--r--fs/gfs2/aops.c3
-rw-r--r--fs/gfs2/file.c2
-rw-r--r--fs/gfs2/log.c4
-rw-r--r--fs/gfs2/lops.c12
-rw-r--r--fs/gfs2/meta_io.c3
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hfsplus/ioctl.c2
-rw-r--r--fs/hpfs/file.c1
-rw-r--r--fs/hugetlbfs/inode.c3
-rw-r--r--fs/inode.c678
-rw-r--r--fs/internal.h8
-rw-r--r--fs/ioctl.c21
-rw-r--r--fs/isofs/inode.c1
-rw-r--r--fs/jbd/commit.c22
-rw-r--r--fs/jbd2/commit.c22
-rw-r--r--fs/jffs2/acl.c2
-rw-r--r--fs/jffs2/compr_zlib.c7
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/jfs/Makefile2
-rw-r--r--fs/jfs/inode.c1
-rw-r--r--fs/jfs/ioctl.c2
-rw-r--r--fs/jfs/jfs_metapage.c1
-rw-r--r--fs/jfs/xattr.c2
-rw-r--r--fs/locks.c12
-rw-r--r--fs/logfs/compr.c2
-rw-r--r--fs/logfs/dev_bdev.c2
-rw-r--r--fs/logfs/file.c2
-rw-r--r--fs/logfs/inode.c2
-rw-r--r--fs/minix/Kconfig8
-rw-r--r--fs/minix/inode.c1
-rw-r--r--fs/minix/minix.h74
-rw-r--r--fs/mpage.c8
-rw-r--r--fs/namei.c53
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/ncpfs/Makefile2
-rw-r--r--fs/nfs/dir.c89
-rw-r--r--fs/nfs/file.c3
-rw-r--r--fs/nfs/getroot.c4
-rw-r--r--fs/nfs/inode.c10
-rw-r--r--fs/nfs/internal.h27
-rw-r--r--fs/nfs/namespace.c121
-rw-r--r--fs/nfs/nfs3proc.c2
-rw-r--r--fs/nfs/nfs4_fs.h5
-rw-r--r--fs/nfs/nfs4filelayout.c352
-rw-r--r--fs/nfs/nfs4filelayout.h2
-rw-r--r--fs/nfs/nfs4filelayoutdev.c178
-rw-r--r--fs/nfs/nfs4proc.c302
-rw-r--r--fs/nfs/nfs4state.c3
-rw-r--r--fs/nfs/nfs4xdr.c313
-rw-r--r--fs/nfs/pagelist.c12
-rw-r--r--fs/nfs/pnfs.c142
-rw-r--r--fs/nfs/pnfs.h83
-rw-r--r--fs/nfs/proc.c2
-rw-r--r--fs/nfs/write.c227
-rw-r--r--fs/nfs_common/nfsacl.c1
-rw-r--r--fs/nfsd/export.c1
-rw-r--r--fs/nfsd/nfs4idmap.c1
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4state.c160
-rw-r--r--fs/nfsd/nfs4xdr.c5
-rw-r--r--fs/nfsd/nfsctl.c35
-rw-r--r--fs/nfsd/state.h12
-rw-r--r--fs/nfsd/vfs.c2
-rw-r--r--fs/nilfs2/alloc.c12
-rw-r--r--fs/nilfs2/alloc.h2
-rw-r--r--fs/nilfs2/bmap.c12
-rw-r--r--fs/nilfs2/bmap.h3
-rw-r--r--fs/nilfs2/btnode.c7
-rw-r--r--fs/nilfs2/btree.c6
-rw-r--r--fs/nilfs2/dir.c5
-rw-r--r--fs/nilfs2/direct.c4
-rw-r--r--fs/nilfs2/file.c4
-rw-r--r--fs/nilfs2/gcinode.c1
-rw-r--r--fs/nilfs2/inode.c84
-rw-r--r--fs/nilfs2/ioctl.c115
-rw-r--r--fs/nilfs2/mdt.c9
-rw-r--r--fs/nilfs2/mdt.h2
-rw-r--r--fs/nilfs2/namei.c2
-rw-r--r--fs/nilfs2/nilfs.h33
-rw-r--r--fs/nilfs2/page.c5
-rw-r--r--fs/nilfs2/page.h3
-rw-r--r--fs/nilfs2/recovery.c32
-rw-r--r--fs/nilfs2/sb.h85
-rw-r--r--fs/nilfs2/segbuf.c2
-rw-r--r--fs/nilfs2/segment.c258
-rw-r--r--fs/nilfs2/segment.h14
-rw-r--r--fs/nilfs2/super.c214
-rw-r--r--fs/nilfs2/the_nilfs.c44
-rw-r--r--fs/nilfs2/the_nilfs.h51
-rw-r--r--fs/notify/inode_mark.c42
-rw-r--r--fs/notify/mark.c1
-rw-r--r--fs/notify/vfsmount_mark.c1
-rw-r--r--fs/ntfs/Makefile19
-rw-r--r--fs/ntfs/aops.c4
-rw-r--r--fs/ntfs/compress.c3
-rw-r--r--fs/ntfs/inode.c4
-rw-r--r--fs/ocfs2/Makefile4
-rw-r--r--fs/ocfs2/acl.c3
-rw-r--r--fs/ocfs2/alloc.c214
-rw-r--r--fs/ocfs2/aops.c83
-rw-r--r--fs/ocfs2/buffer_head_io.c49
-rw-r--r--fs/ocfs2/cluster/heartbeat.c7
-rw-r--r--fs/ocfs2/cluster/masklog.c20
-rw-r--r--fs/ocfs2/cluster/masklog.h105
-rw-r--r--fs/ocfs2/cluster/tcp.c10
-rw-r--r--fs/ocfs2/dcache.c45
-rw-r--r--fs/ocfs2/dir.c121
-rw-r--r--fs/ocfs2/dlm/Makefile2
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c6
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c36
-rw-r--r--fs/ocfs2/dlm/dlmlock.c10
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c6
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c9
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c4
-rw-r--r--fs/ocfs2/dlmfs/Makefile2
-rw-r--r--fs/ocfs2/dlmglue.c246
-rw-r--r--fs/ocfs2/export.c47
-rw-r--r--fs/ocfs2/extent_map.c10
-rw-r--r--fs/ocfs2/file.c220
-rw-r--r--fs/ocfs2/heartbeat.c4
-rw-r--r--fs/ocfs2/inode.c134
-rw-r--r--fs/ocfs2/ioctl.c43
-rw-r--r--fs/ocfs2/journal.c168
-rw-r--r--fs/ocfs2/localalloc.c109
-rw-r--r--fs/ocfs2/locks.c1
-rw-r--r--fs/ocfs2/mmap.c7
-rw-r--r--fs/ocfs2/namei.c175
-rw-r--r--fs/ocfs2/ocfs2.h33
-rw-r--r--fs/ocfs2/ocfs2_trace.h2739
-rw-r--r--fs/ocfs2/quota_global.c45
-rw-r--r--fs/ocfs2/quota_local.c16
-rw-r--r--fs/ocfs2/refcounttree.c158
-rw-r--r--fs/ocfs2/reservations.c57
-rw-r--r--fs/ocfs2/resize.c23
-rw-r--r--fs/ocfs2/slot_map.c16
-rw-r--r--fs/ocfs2/suballoc.c189
-rw-r--r--fs/ocfs2/super.c89
-rw-r--r--fs/ocfs2/symlink.c14
-rw-r--r--fs/ocfs2/sysfile.c1
-rw-r--r--fs/ocfs2/uptodate.c73
-rw-r--r--fs/ocfs2/xattr.c155
-rw-r--r--fs/omfs/file.c1
-rw-r--r--fs/open.c13
-rw-r--r--fs/partitions/check.c3
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/base.c178
-rw-r--r--fs/proc/generic.c8
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/root.c32
-rw-r--r--fs/proc/task_mmu.c138
-rw-r--r--fs/proc/task_nommu.c6
-rw-r--r--fs/pstore/inode.c76
-rw-r--r--fs/pstore/internal.h3
-rw-r--r--fs/pstore/platform.c31
-rw-r--r--fs/qnx4/inode.c1
-rw-r--r--fs/quota/dquot.c41
-rw-r--r--fs/reiserfs/Makefile4
-rw-r--r--fs/reiserfs/inode.c1
-rw-r--r--fs/reiserfs/ioctl.c4
-rw-r--r--fs/reiserfs/xattr_acl.c2
-rw-r--r--fs/select.c3
-rw-r--r--fs/squashfs/Kconfig12
-rw-r--r--fs/squashfs/decompressor.c34
-rw-r--r--fs/squashfs/decompressor.h7
-rw-r--r--fs/squashfs/dir.c9
-rw-r--r--fs/squashfs/lzo_wrapper.c4
-rw-r--r--fs/squashfs/namei.c12
-rw-r--r--fs/squashfs/squashfs.h1
-rw-r--r--fs/squashfs/squashfs_fs.h4
-rw-r--r--fs/squashfs/super.c15
-rw-r--r--fs/squashfs/xz_wrapper.c53
-rw-r--r--fs/squashfs/zlib_wrapper.c10
-rw-r--r--fs/super.c2
-rw-r--r--fs/sync.c28
-rw-r--r--fs/sysv/itree.c1
-rw-r--r--fs/ubifs/Kconfig9
-rw-r--r--fs/ubifs/debug.c2
-rw-r--r--fs/ubifs/file.c11
-rw-r--r--fs/ubifs/ioctl.c2
-rw-r--r--fs/ubifs/lprops.c2
-rw-r--r--fs/ubifs/lpt_commit.c4
-rw-r--r--fs/ubifs/orphan.c2
-rw-r--r--fs/ubifs/super.c1
-rw-r--r--fs/udf/balloc.c9
-rw-r--r--fs/udf/file.c1
-rw-r--r--fs/udf/inode.c1
-rw-r--r--fs/ufs/inode.c1
-rw-r--r--fs/ufs/truncate.c2
-rw-r--r--fs/ufs/util.h2
-rw-r--r--fs/utimes.c2
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xfs/Makefile12
-rw-r--r--fs/xfs/linux-2.6/kmem.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c374
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h40
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h23
-rw-r--r--fs/xfs/linux-2.6/xfs_message.c133
-rw-r--r--fs/xfs/linux-2.6/xfs_message.h38
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c164
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c35
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c2
-rw-r--r--fs/xfs/quota/xfs_dquot.c48
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c5
-rw-r--r--fs/xfs/quota/xfs_qm.c49
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c3
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c85
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c5
-rw-r--r--fs/xfs/support/debug.c107
-rw-r--r--fs/xfs/support/debug.h61
-rw-r--r--fs/xfs/xfs_alloc.c158
-rw-r--r--fs/xfs/xfs_bmap.c24
-rw-r--r--fs/xfs/xfs_buf_item.c15
-rw-r--r--fs/xfs/xfs_da_btree.c9
-rw-r--r--fs/xfs/xfs_dfrag.c4
-rw-r--r--fs/xfs/xfs_dir2.c2
-rw-r--r--fs/xfs/xfs_dir2_node.c25
-rw-r--r--fs/xfs/xfs_error.c22
-rw-r--r--fs/xfs/xfs_error.h19
-rw-r--r--fs/xfs/xfs_fsops.c6
-rw-r--r--fs/xfs/xfs_ialloc.c82
-rw-r--r--fs/xfs/xfs_inode.c131
-rw-r--r--fs/xfs/xfs_inode.h23
-rw-r--r--fs/xfs/xfs_inode_item.c6
-rw-r--r--fs/xfs/xfs_iomap.c12
-rw-r--r--fs/xfs/xfs_log.c124
-rw-r--r--fs/xfs/xfs_log_priv.h4
-rw-r--r--fs/xfs/xfs_log_recover.c223
-rw-r--r--fs/xfs/xfs_mount.c148
-rw-r--r--fs/xfs/xfs_quota.h3
-rw-r--r--fs/xfs/xfs_rtalloc.c92
-rw-r--r--fs/xfs/xfs_rtalloc.h2
-rw-r--r--fs/xfs/xfs_rw.c58
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_ail.c2
-rw-r--r--fs/xfs/xfs_trans_buf.c9
-rw-r--r--fs/xfs/xfs_trans_inode.c22
-rw-r--r--fs/xfs/xfs_vnodeops.c77
-rw-r--r--fs/xfs/xfs_vnodeops.h1
370 files changed, 11446 insertions, 6793 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 515455296378..535ab6eccb1a 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -262,7 +262,7 @@ static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name,
262 if (strcmp(name, "") != 0) 262 if (strcmp(name, "") != 0)
263 return -EINVAL; 263 return -EINVAL;
264 264
265 v9ses = v9fs_inode2v9ses(dentry->d_inode); 265 v9ses = v9fs_dentry2v9ses(dentry);
266 /* 266 /*
267 * We allow set/get/list of acl when access=client is not specified 267 * We allow set/get/list of acl when access=client is not specified
268 */ 268 */
@@ -312,7 +312,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
312 if (strcmp(name, "") != 0) 312 if (strcmp(name, "") != 0)
313 return -EINVAL; 313 return -EINVAL;
314 314
315 v9ses = v9fs_inode2v9ses(dentry->d_inode); 315 v9ses = v9fs_dentry2v9ses(dentry);
316 /* 316 /*
317 * set the attribute on the remote. Without even looking at the 317 * set the attribute on the remote. Without even looking at the
318 * xattr value. We leave it to the server to validate 318 * xattr value. We leave it to the server to validate
@@ -323,7 +323,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
323 323
324 if (S_ISLNK(inode->i_mode)) 324 if (S_ISLNK(inode->i_mode))
325 return -EOPNOTSUPP; 325 return -EOPNOTSUPP;
326 if (!is_owner_or_cap(inode)) 326 if (!inode_owner_or_capable(inode))
327 return -EPERM; 327 return -EPERM;
328 if (value) { 328 if (value) {
329 /* update the cached acl value */ 329 /* update the cached acl value */
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index cd63e002d826..0ee594569dcc 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -134,7 +134,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
134 struct v9fs_session_info *v9ses; 134 struct v9fs_session_info *v9ses;
135 struct p9_fid *fid, *old_fid = NULL; 135 struct p9_fid *fid, *old_fid = NULL;
136 136
137 v9ses = v9fs_inode2v9ses(dentry->d_inode); 137 v9ses = v9fs_dentry2v9ses(dentry);
138 access = v9ses->flags & V9FS_ACCESS_MASK; 138 access = v9ses->flags & V9FS_ACCESS_MASK;
139 fid = v9fs_fid_find(dentry, uid, any); 139 fid = v9fs_fid_find(dentry, uid, any);
140 if (fid) 140 if (fid)
@@ -237,7 +237,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
237 int any, access; 237 int any, access;
238 struct v9fs_session_info *v9ses; 238 struct v9fs_session_info *v9ses;
239 239
240 v9ses = v9fs_inode2v9ses(dentry->d_inode); 240 v9ses = v9fs_dentry2v9ses(dentry);
241 access = v9ses->flags & V9FS_ACCESS_MASK; 241 access = v9ses->flags & V9FS_ACCESS_MASK;
242 switch (access) { 242 switch (access) {
243 case V9FS_ACCESS_SINGLE: 243 case V9FS_ACCESS_SINGLE:
@@ -286,9 +286,11 @@ static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid)
286 286
287struct p9_fid *v9fs_writeback_fid(struct dentry *dentry) 287struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
288{ 288{
289 int err; 289 int err, flags;
290 struct p9_fid *fid; 290 struct p9_fid *fid;
291 struct v9fs_session_info *v9ses;
291 292
293 v9ses = v9fs_dentry2v9ses(dentry);
292 fid = v9fs_fid_clone_with_uid(dentry, 0); 294 fid = v9fs_fid_clone_with_uid(dentry, 0);
293 if (IS_ERR(fid)) 295 if (IS_ERR(fid))
294 goto error_out; 296 goto error_out;
@@ -297,8 +299,17 @@ struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
297 * dirty pages. We always request for the open fid in read-write 299 * dirty pages. We always request for the open fid in read-write
298 * mode so that a partial page write which result in page 300 * mode so that a partial page write which result in page
299 * read can work. 301 * read can work.
302 *
303 * we don't have a tsyncfs operation for older version
304 * of protocol. So make sure the write back fid is
305 * opened in O_SYNC mode.
300 */ 306 */
301 err = p9_client_open(fid, O_RDWR); 307 if (!v9fs_proto_dotl(v9ses))
308 flags = O_RDWR | O_SYNC;
309 else
310 flags = O_RDWR;
311
312 err = p9_client_open(fid, flags);
302 if (err < 0) { 313 if (err < 0) {
303 p9_client_clunk(fid); 314 p9_client_clunk(fid);
304 fid = ERR_PTR(err); 315 fid = ERR_PTR(err);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index bd8496db135b..9665c2b840e6 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -130,6 +130,7 @@ struct v9fs_inode {
130#endif 130#endif
131 unsigned int cache_validity; 131 unsigned int cache_validity;
132 struct p9_fid *writeback_fid; 132 struct p9_fid *writeback_fid;
133 struct mutex v_mutex;
133 struct inode vfs_inode; 134 struct inode vfs_inode;
134}; 135};
135 136
@@ -173,6 +174,11 @@ static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
173 return (inode->i_sb->s_fs_info); 174 return (inode->i_sb->s_fs_info);
174} 175}
175 176
177static inline struct v9fs_session_info *v9fs_dentry2v9ses(struct dentry *dentry)
178{
179 return dentry->d_sb->s_fs_info;
180}
181
176static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses) 182static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses)
177{ 183{
178 return v9ses->flags & V9FS_PROTO_2000U; 184 return v9ses->flags & V9FS_PROTO_2000U;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 78bcb97c3425..ffed55817f0c 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -90,7 +90,9 @@ int v9fs_file_open(struct inode *inode, struct file *file)
90 } 90 }
91 91
92 file->private_data = fid; 92 file->private_data = fid;
93 if (v9ses->cache && !v9inode->writeback_fid) { 93 mutex_lock(&v9inode->v_mutex);
94 if (v9ses->cache && !v9inode->writeback_fid &&
95 ((file->f_flags & O_ACCMODE) != O_RDONLY)) {
94 /* 96 /*
95 * clone a fid and add it to writeback_fid 97 * clone a fid and add it to writeback_fid
96 * we do it during open time instead of 98 * we do it during open time instead of
@@ -101,10 +103,12 @@ int v9fs_file_open(struct inode *inode, struct file *file)
101 fid = v9fs_writeback_fid(file->f_path.dentry); 103 fid = v9fs_writeback_fid(file->f_path.dentry);
102 if (IS_ERR(fid)) { 104 if (IS_ERR(fid)) {
103 err = PTR_ERR(fid); 105 err = PTR_ERR(fid);
106 mutex_unlock(&v9inode->v_mutex);
104 goto out_error; 107 goto out_error;
105 } 108 }
106 v9inode->writeback_fid = (void *) fid; 109 v9inode->writeback_fid = (void *) fid;
107 } 110 }
111 mutex_unlock(&v9inode->v_mutex);
108#ifdef CONFIG_9P_FSCACHE 112#ifdef CONFIG_9P_FSCACHE
109 if (v9ses->cache) 113 if (v9ses->cache)
110 v9fs_cache_inode_set_cookie(inode, file); 114 v9fs_cache_inode_set_cookie(inode, file);
@@ -504,9 +508,12 @@ v9fs_file_write(struct file *filp, const char __user * data,
504 if (!count) 508 if (!count)
505 goto out; 509 goto out;
506 510
507 return v9fs_file_write_internal(filp->f_path.dentry->d_inode, 511 retval = v9fs_file_write_internal(filp->f_path.dentry->d_inode,
508 filp->private_data, 512 filp->private_data,
509 data, count, offset, 1); 513 data, count, &origin, 1);
514 /* update offset on successful write */
515 if (retval > 0)
516 *offset = origin;
510out: 517out:
511 return retval; 518 return retval;
512} 519}
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 8a2c232f708a..7f6c67703195 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -221,6 +221,7 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
221#endif 221#endif
222 v9inode->writeback_fid = NULL; 222 v9inode->writeback_fid = NULL;
223 v9inode->cache_validity = 0; 223 v9inode->cache_validity = 0;
224 mutex_init(&v9inode->v_mutex);
224 return &v9inode->vfs_inode; 225 return &v9inode->vfs_inode;
225} 226}
226 227
@@ -650,7 +651,9 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
650 /* if we are opening a file, assign the open fid to the file */ 651 /* if we are opening a file, assign the open fid to the file */
651 if (nd && nd->flags & LOOKUP_OPEN) { 652 if (nd && nd->flags & LOOKUP_OPEN) {
652 v9inode = V9FS_I(dentry->d_inode); 653 v9inode = V9FS_I(dentry->d_inode);
653 if (v9ses->cache && !v9inode->writeback_fid) { 654 mutex_lock(&v9inode->v_mutex);
655 if (v9ses->cache && !v9inode->writeback_fid &&
656 ((flags & O_ACCMODE) != O_RDONLY)) {
654 /* 657 /*
655 * clone a fid and add it to writeback_fid 658 * clone a fid and add it to writeback_fid
656 * we do it during open time instead of 659 * we do it during open time instead of
@@ -661,10 +664,12 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
661 inode_fid = v9fs_writeback_fid(dentry); 664 inode_fid = v9fs_writeback_fid(dentry);
662 if (IS_ERR(inode_fid)) { 665 if (IS_ERR(inode_fid)) {
663 err = PTR_ERR(inode_fid); 666 err = PTR_ERR(inode_fid);
667 mutex_unlock(&v9inode->v_mutex);
664 goto error; 668 goto error;
665 } 669 }
666 v9inode->writeback_fid = (void *) inode_fid; 670 v9inode->writeback_fid = (void *) inode_fid;
667 } 671 }
672 mutex_unlock(&v9inode->v_mutex);
668 filp = lookup_instantiate_filp(nd, dentry, generic_file_open); 673 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
669 if (IS_ERR(filp)) { 674 if (IS_ERR(filp)) {
670 err = PTR_ERR(filp); 675 err = PTR_ERR(filp);
@@ -931,7 +936,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
931 936
932 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); 937 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
933 err = -EPERM; 938 err = -EPERM;
934 v9ses = v9fs_inode2v9ses(dentry->d_inode); 939 v9ses = v9fs_dentry2v9ses(dentry);
935 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 940 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
936 generic_fillattr(dentry->d_inode, stat); 941 generic_fillattr(dentry->d_inode, stat);
937 return 0; 942 return 0;
@@ -967,8 +972,12 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
967 struct p9_wstat wstat; 972 struct p9_wstat wstat;
968 973
969 P9_DPRINTK(P9_DEBUG_VFS, "\n"); 974 P9_DPRINTK(P9_DEBUG_VFS, "\n");
975 retval = inode_change_ok(dentry->d_inode, iattr);
976 if (retval)
977 return retval;
978
970 retval = -EPERM; 979 retval = -EPERM;
971 v9ses = v9fs_inode2v9ses(dentry->d_inode); 980 v9ses = v9fs_dentry2v9ses(dentry);
972 fid = v9fs_fid_lookup(dentry); 981 fid = v9fs_fid_lookup(dentry);
973 if(IS_ERR(fid)) 982 if(IS_ERR(fid))
974 return PTR_ERR(fid); 983 return PTR_ERR(fid);
@@ -993,12 +1002,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
993 if (iattr->ia_valid & ATTR_GID) 1002 if (iattr->ia_valid & ATTR_GID)
994 wstat.n_gid = iattr->ia_gid; 1003 wstat.n_gid = iattr->ia_gid;
995 } 1004 }
996 if ((iattr->ia_valid & ATTR_SIZE) && 1005
997 iattr->ia_size != i_size_read(dentry->d_inode)) {
998 retval = vmtruncate(dentry->d_inode, iattr->ia_size);
999 if (retval)
1000 return retval;
1001 }
1002 /* Write all dirty data */ 1006 /* Write all dirty data */
1003 if (S_ISREG(dentry->d_inode->i_mode)) 1007 if (S_ISREG(dentry->d_inode->i_mode))
1004 filemap_write_and_wait(dentry->d_inode->i_mapping); 1008 filemap_write_and_wait(dentry->d_inode->i_mapping);
@@ -1006,6 +1010,11 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
1006 retval = p9_client_wstat(fid, &wstat); 1010 retval = p9_client_wstat(fid, &wstat);
1007 if (retval < 0) 1011 if (retval < 0)
1008 return retval; 1012 return retval;
1013
1014 if ((iattr->ia_valid & ATTR_SIZE) &&
1015 iattr->ia_size != i_size_read(dentry->d_inode))
1016 truncate_setsize(dentry->d_inode, iattr->ia_size);
1017
1009 v9fs_invalidate_inode_attr(dentry->d_inode); 1018 v9fs_invalidate_inode_attr(dentry->d_inode);
1010 1019
1011 setattr_copy(dentry->d_inode, iattr); 1020 setattr_copy(dentry->d_inode, iattr);
@@ -1130,7 +1139,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
1130 1139
1131 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); 1140 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
1132 retval = -EPERM; 1141 retval = -EPERM;
1133 v9ses = v9fs_inode2v9ses(dentry->d_inode); 1142 v9ses = v9fs_dentry2v9ses(dentry);
1134 fid = v9fs_fid_lookup(dentry); 1143 fid = v9fs_fid_lookup(dentry);
1135 if (IS_ERR(fid)) 1144 if (IS_ERR(fid))
1136 return PTR_ERR(fid); 1145 return PTR_ERR(fid);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 67c138e94feb..ffbb113d5f33 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -245,7 +245,9 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
245 v9fs_set_create_acl(dentry, dacl, pacl); 245 v9fs_set_create_acl(dentry, dacl, pacl);
246 246
247 v9inode = V9FS_I(inode); 247 v9inode = V9FS_I(inode);
248 if (v9ses->cache && !v9inode->writeback_fid) { 248 mutex_lock(&v9inode->v_mutex);
249 if (v9ses->cache && !v9inode->writeback_fid &&
250 ((flags & O_ACCMODE) != O_RDONLY)) {
249 /* 251 /*
250 * clone a fid and add it to writeback_fid 252 * clone a fid and add it to writeback_fid
251 * we do it during open time instead of 253 * we do it during open time instead of
@@ -256,10 +258,12 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
256 inode_fid = v9fs_writeback_fid(dentry); 258 inode_fid = v9fs_writeback_fid(dentry);
257 if (IS_ERR(inode_fid)) { 259 if (IS_ERR(inode_fid)) {
258 err = PTR_ERR(inode_fid); 260 err = PTR_ERR(inode_fid);
261 mutex_unlock(&v9inode->v_mutex);
259 goto error; 262 goto error;
260 } 263 }
261 v9inode->writeback_fid = (void *) inode_fid; 264 v9inode->writeback_fid = (void *) inode_fid;
262 } 265 }
266 mutex_unlock(&v9inode->v_mutex);
263 /* Since we are opening a file, assign the open fid to the file */ 267 /* Since we are opening a file, assign the open fid to the file */
264 filp = lookup_instantiate_filp(nd, dentry, generic_file_open); 268 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
265 if (IS_ERR(filp)) { 269 if (IS_ERR(filp)) {
@@ -391,7 +395,7 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
391 395
392 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); 396 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
393 err = -EPERM; 397 err = -EPERM;
394 v9ses = v9fs_inode2v9ses(dentry->d_inode); 398 v9ses = v9fs_dentry2v9ses(dentry);
395 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 399 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
396 generic_fillattr(dentry->d_inode, stat); 400 generic_fillattr(dentry->d_inode, stat);
397 return 0; 401 return 0;
@@ -448,17 +452,11 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
448 p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec; 452 p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
449 453
450 retval = -EPERM; 454 retval = -EPERM;
451 v9ses = v9fs_inode2v9ses(dentry->d_inode); 455 v9ses = v9fs_dentry2v9ses(dentry);
452 fid = v9fs_fid_lookup(dentry); 456 fid = v9fs_fid_lookup(dentry);
453 if (IS_ERR(fid)) 457 if (IS_ERR(fid))
454 return PTR_ERR(fid); 458 return PTR_ERR(fid);
455 459
456 if ((iattr->ia_valid & ATTR_SIZE) &&
457 iattr->ia_size != i_size_read(dentry->d_inode)) {
458 retval = vmtruncate(dentry->d_inode, iattr->ia_size);
459 if (retval)
460 return retval;
461 }
462 /* Write all dirty data */ 460 /* Write all dirty data */
463 if (S_ISREG(dentry->d_inode->i_mode)) 461 if (S_ISREG(dentry->d_inode->i_mode))
464 filemap_write_and_wait(dentry->d_inode->i_mapping); 462 filemap_write_and_wait(dentry->d_inode->i_mapping);
@@ -466,8 +464,12 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
466 retval = p9_client_setattr(fid, &p9attr); 464 retval = p9_client_setattr(fid, &p9attr);
467 if (retval < 0) 465 if (retval < 0)
468 return retval; 466 return retval;
469 v9fs_invalidate_inode_attr(dentry->d_inode);
470 467
468 if ((iattr->ia_valid & ATTR_SIZE) &&
469 iattr->ia_size != i_size_read(dentry->d_inode))
470 truncate_setsize(dentry->d_inode, iattr->ia_size);
471
472 v9fs_invalidate_inode_attr(dentry->d_inode);
471 setattr_copy(dentry->d_inode, iattr); 473 setattr_copy(dentry->d_inode, iattr);
472 mark_inode_dirty(dentry->d_inode); 474 mark_inode_dirty(dentry->d_inode);
473 if (iattr->ia_valid & ATTR_MODE) { 475 if (iattr->ia_valid & ATTR_MODE) {
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 09fd08d1606f..f3eed3383e4f 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -262,7 +262,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
262 goto done; 262 goto done;
263 } 263 }
264 264
265 v9ses = v9fs_inode2v9ses(dentry->d_inode); 265 v9ses = v9fs_dentry2v9ses(dentry);
266 if (v9fs_proto_dotl(v9ses)) { 266 if (v9fs_proto_dotl(v9ses)) {
267 res = p9_client_statfs(fid, &rs); 267 res = p9_client_statfs(fid, &rs);
268 if (res == 0) { 268 if (res == 0) {
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 2ff622f6f547..718ac1f440c6 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -50,6 +50,7 @@ struct adfs_sb_info {
50 gid_t s_gid; /* owner gid */ 50 gid_t s_gid; /* owner gid */
51 umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ 51 umode_t s_owner_mask; /* ADFS owner perm -> unix perm */
52 umode_t s_other_mask; /* ADFS other perm -> unix perm */ 52 umode_t s_other_mask; /* ADFS other perm -> unix perm */
53 int s_ftsuffix; /* ,xyz hex filetype suffix option */
53 54
54 __u32 s_ids_per_zone; /* max. no ids in one zone */ 55 __u32 s_ids_per_zone; /* max. no ids in one zone */
55 __u32 s_idlen; /* length of ID in map */ 56 __u32 s_idlen; /* length of ID in map */
@@ -79,6 +80,10 @@ struct adfs_dir {
79 80
80 int nr_buffers; 81 int nr_buffers;
81 struct buffer_head *bh[4]; 82 struct buffer_head *bh[4];
83
84 /* big directories need allocated buffers */
85 struct buffer_head **bh_fplus;
86
82 unsigned int pos; 87 unsigned int pos;
83 unsigned int parent_id; 88 unsigned int parent_id;
84 89
@@ -89,7 +94,7 @@ struct adfs_dir {
89/* 94/*
90 * This is the overall maximum name length 95 * This is the overall maximum name length
91 */ 96 */
92#define ADFS_MAX_NAME_LEN 256 97#define ADFS_MAX_NAME_LEN (256 + 4) /* +4 for ,xyz hex filetype suffix */
93struct object_info { 98struct object_info {
94 __u32 parent_id; /* parent object id */ 99 __u32 parent_id; /* parent object id */
95 __u32 file_id; /* object id */ 100 __u32 file_id; /* object id */
@@ -97,10 +102,26 @@ struct object_info {
97 __u32 execaddr; /* execution address */ 102 __u32 execaddr; /* execution address */
98 __u32 size; /* size */ 103 __u32 size; /* size */
99 __u8 attr; /* RISC OS attributes */ 104 __u8 attr; /* RISC OS attributes */
100 unsigned char name_len; /* name length */ 105 unsigned int name_len; /* name length */
101 char name[ADFS_MAX_NAME_LEN];/* file name */ 106 char name[ADFS_MAX_NAME_LEN];/* file name */
107
108 /* RISC OS file type (12-bit: derived from loadaddr) */
109 __u16 filetype;
102}; 110};
103 111
112/* RISC OS 12-bit filetype converts to ,xyz hex filename suffix */
113static inline int append_filetype_suffix(char *buf, __u16 filetype)
114{
115 if (filetype == 0xffff) /* no explicit 12-bit file type was set */
116 return 0;
117
118 *buf++ = ',';
119 *buf++ = hex_asc_lo(filetype >> 8);
120 *buf++ = hex_asc_lo(filetype >> 4);
121 *buf++ = hex_asc_lo(filetype >> 0);
122 return 4;
123}
124
104struct adfs_dir_ops { 125struct adfs_dir_ops {
105 int (*read)(struct super_block *sb, unsigned int id, unsigned int sz, struct adfs_dir *dir); 126 int (*read)(struct super_block *sb, unsigned int id, unsigned int sz, struct adfs_dir *dir);
106 int (*setpos)(struct adfs_dir *dir, unsigned int fpos); 127 int (*setpos)(struct adfs_dir *dir, unsigned int fpos);
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index bafc71222e25..4bbe853ee50a 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -52,7 +52,6 @@ static inline int adfs_readname(char *buf, char *ptr, int maxlen)
52 *buf++ = *ptr; 52 *buf++ = *ptr;
53 ptr++; 53 ptr++;
54 } 54 }
55 *buf = '\0';
56 55
57 return buf - old_buf; 56 return buf - old_buf;
58} 57}
@@ -208,7 +207,8 @@ release_buffers:
208 * convert a disk-based directory entry to a Linux ADFS directory entry 207 * convert a disk-based directory entry to a Linux ADFS directory entry
209 */ 208 */
210static inline void 209static inline void
211adfs_dir2obj(struct object_info *obj, struct adfs_direntry *de) 210adfs_dir2obj(struct adfs_dir *dir, struct object_info *obj,
211 struct adfs_direntry *de)
212{ 212{
213 obj->name_len = adfs_readname(obj->name, de->dirobname, ADFS_F_NAME_LEN); 213 obj->name_len = adfs_readname(obj->name, de->dirobname, ADFS_F_NAME_LEN);
214 obj->file_id = adfs_readval(de->dirinddiscadd, 3); 214 obj->file_id = adfs_readval(de->dirinddiscadd, 3);
@@ -216,6 +216,23 @@ adfs_dir2obj(struct object_info *obj, struct adfs_direntry *de)
216 obj->execaddr = adfs_readval(de->direxec, 4); 216 obj->execaddr = adfs_readval(de->direxec, 4);
217 obj->size = adfs_readval(de->dirlen, 4); 217 obj->size = adfs_readval(de->dirlen, 4);
218 obj->attr = de->newdiratts; 218 obj->attr = de->newdiratts;
219 obj->filetype = -1;
220
221 /*
222 * object is a file and is filetyped and timestamped?
223 * RISC OS 12-bit filetype is stored in load_address[19:8]
224 */
225 if ((0 == (obj->attr & ADFS_NDA_DIRECTORY)) &&
226 (0xfff00000 == (0xfff00000 & obj->loadaddr))) {
227 obj->filetype = (__u16) ((0x000fff00 & obj->loadaddr) >> 8);
228
229 /* optionally append the ,xyz hex filetype suffix */
230 if (ADFS_SB(dir->sb)->s_ftsuffix)
231 obj->name_len +=
232 append_filetype_suffix(
233 &obj->name[obj->name_len],
234 obj->filetype);
235 }
219} 236}
220 237
221/* 238/*
@@ -260,7 +277,7 @@ __adfs_dir_get(struct adfs_dir *dir, int pos, struct object_info *obj)
260 if (!de.dirobname[0]) 277 if (!de.dirobname[0])
261 return -ENOENT; 278 return -ENOENT;
262 279
263 adfs_dir2obj(obj, &de); 280 adfs_dir2obj(dir, obj, &de);
264 281
265 return 0; 282 return 0;
266} 283}
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index 1796bb352d05..d9e3bee4e653 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -8,6 +8,7 @@
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/slab.h>
11#include "adfs.h" 12#include "adfs.h"
12#include "dir_fplus.h" 13#include "dir_fplus.h"
13 14
@@ -22,30 +23,53 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
22 23
23 dir->nr_buffers = 0; 24 dir->nr_buffers = 0;
24 25
26 /* start off using fixed bh set - only alloc for big dirs */
27 dir->bh_fplus = &dir->bh[0];
28
25 block = __adfs_block_map(sb, id, 0); 29 block = __adfs_block_map(sb, id, 0);
26 if (!block) { 30 if (!block) {
27 adfs_error(sb, "dir object %X has a hole at offset 0", id); 31 adfs_error(sb, "dir object %X has a hole at offset 0", id);
28 goto out; 32 goto out;
29 } 33 }
30 34
31 dir->bh[0] = sb_bread(sb, block); 35 dir->bh_fplus[0] = sb_bread(sb, block);
32 if (!dir->bh[0]) 36 if (!dir->bh_fplus[0])
33 goto out; 37 goto out;
34 dir->nr_buffers += 1; 38 dir->nr_buffers += 1;
35 39
36 h = (struct adfs_bigdirheader *)dir->bh[0]->b_data; 40 h = (struct adfs_bigdirheader *)dir->bh_fplus[0]->b_data;
37 size = le32_to_cpu(h->bigdirsize); 41 size = le32_to_cpu(h->bigdirsize);
38 if (size != sz) { 42 if (size != sz) {
39 printk(KERN_WARNING "adfs: adfs_fplus_read: directory header size\n" 43 printk(KERN_WARNING "adfs: adfs_fplus_read:"
40 " does not match directory size\n"); 44 " directory header size %X\n"
45 " does not match directory size %X\n",
46 size, sz);
41 } 47 }
42 48
43 if (h->bigdirversion[0] != 0 || h->bigdirversion[1] != 0 || 49 if (h->bigdirversion[0] != 0 || h->bigdirversion[1] != 0 ||
44 h->bigdirversion[2] != 0 || size & 2047 || 50 h->bigdirversion[2] != 0 || size & 2047 ||
45 h->bigdirstartname != cpu_to_le32(BIGDIRSTARTNAME)) 51 h->bigdirstartname != cpu_to_le32(BIGDIRSTARTNAME)) {
52 printk(KERN_WARNING "adfs: dir object %X has"
53 " malformed dir header\n", id);
46 goto out; 54 goto out;
55 }
47 56
48 size >>= sb->s_blocksize_bits; 57 size >>= sb->s_blocksize_bits;
58 if (size > sizeof(dir->bh)/sizeof(dir->bh[0])) {
59 /* this directory is too big for fixed bh set, must allocate */
60 struct buffer_head **bh_fplus =
61 kzalloc(size * sizeof(struct buffer_head *),
62 GFP_KERNEL);
63 if (!bh_fplus) {
64 adfs_error(sb, "not enough memory for"
65 " dir object %X (%d blocks)", id, size);
66 goto out;
67 }
68 dir->bh_fplus = bh_fplus;
69 /* copy over the pointer to the block that we've already read */
70 dir->bh_fplus[0] = dir->bh[0];
71 }
72
49 for (blk = 1; blk < size; blk++) { 73 for (blk = 1; blk < size; blk++) {
50 block = __adfs_block_map(sb, id, blk); 74 block = __adfs_block_map(sb, id, blk);
51 if (!block) { 75 if (!block) {
@@ -53,25 +77,44 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
53 goto out; 77 goto out;
54 } 78 }
55 79
56 dir->bh[blk] = sb_bread(sb, block); 80 dir->bh_fplus[blk] = sb_bread(sb, block);
57 if (!dir->bh[blk]) 81 if (!dir->bh_fplus[blk]) {
82 adfs_error(sb, "dir object %X failed read for"
83 " offset %d, mapped block %X",
84 id, blk, block);
58 goto out; 85 goto out;
59 dir->nr_buffers = blk; 86 }
87
88 dir->nr_buffers += 1;
60 } 89 }
61 90
62 t = (struct adfs_bigdirtail *)(dir->bh[size - 1]->b_data + (sb->s_blocksize - 8)); 91 t = (struct adfs_bigdirtail *)
92 (dir->bh_fplus[size - 1]->b_data + (sb->s_blocksize - 8));
63 93
64 if (t->bigdirendname != cpu_to_le32(BIGDIRENDNAME) || 94 if (t->bigdirendname != cpu_to_le32(BIGDIRENDNAME) ||
65 t->bigdirendmasseq != h->startmasseq || 95 t->bigdirendmasseq != h->startmasseq ||
66 t->reserved[0] != 0 || t->reserved[1] != 0) 96 t->reserved[0] != 0 || t->reserved[1] != 0) {
97 printk(KERN_WARNING "adfs: dir object %X has "
98 "malformed dir end\n", id);
67 goto out; 99 goto out;
100 }
68 101
69 dir->parent_id = le32_to_cpu(h->bigdirparent); 102 dir->parent_id = le32_to_cpu(h->bigdirparent);
70 dir->sb = sb; 103 dir->sb = sb;
71 return 0; 104 return 0;
105
72out: 106out:
73 for (i = 0; i < dir->nr_buffers; i++) 107 if (dir->bh_fplus) {
74 brelse(dir->bh[i]); 108 for (i = 0; i < dir->nr_buffers; i++)
109 brelse(dir->bh_fplus[i]);
110
111 if (&dir->bh[0] != dir->bh_fplus)
112 kfree(dir->bh_fplus);
113
114 dir->bh_fplus = NULL;
115 }
116
117 dir->nr_buffers = 0;
75 dir->sb = NULL; 118 dir->sb = NULL;
76 return ret; 119 return ret;
77} 120}
@@ -79,7 +122,8 @@ out:
79static int 122static int
80adfs_fplus_setpos(struct adfs_dir *dir, unsigned int fpos) 123adfs_fplus_setpos(struct adfs_dir *dir, unsigned int fpos)
81{ 124{
82 struct adfs_bigdirheader *h = (struct adfs_bigdirheader *)dir->bh[0]->b_data; 125 struct adfs_bigdirheader *h =
126 (struct adfs_bigdirheader *) dir->bh_fplus[0]->b_data;
83 int ret = -ENOENT; 127 int ret = -ENOENT;
84 128
85 if (fpos <= le32_to_cpu(h->bigdirentries)) { 129 if (fpos <= le32_to_cpu(h->bigdirentries)) {
@@ -102,21 +146,27 @@ dir_memcpy(struct adfs_dir *dir, unsigned int offset, void *to, int len)
102 partial = sb->s_blocksize - offset; 146 partial = sb->s_blocksize - offset;
103 147
104 if (partial >= len) 148 if (partial >= len)
105 memcpy(to, dir->bh[buffer]->b_data + offset, len); 149 memcpy(to, dir->bh_fplus[buffer]->b_data + offset, len);
106 else { 150 else {
107 char *c = (char *)to; 151 char *c = (char *)to;
108 152
109 remainder = len - partial; 153 remainder = len - partial;
110 154
111 memcpy(c, dir->bh[buffer]->b_data + offset, partial); 155 memcpy(c,
112 memcpy(c + partial, dir->bh[buffer + 1]->b_data, remainder); 156 dir->bh_fplus[buffer]->b_data + offset,
157 partial);
158
159 memcpy(c + partial,
160 dir->bh_fplus[buffer + 1]->b_data,
161 remainder);
113 } 162 }
114} 163}
115 164
116static int 165static int
117adfs_fplus_getnext(struct adfs_dir *dir, struct object_info *obj) 166adfs_fplus_getnext(struct adfs_dir *dir, struct object_info *obj)
118{ 167{
119 struct adfs_bigdirheader *h = (struct adfs_bigdirheader *)dir->bh[0]->b_data; 168 struct adfs_bigdirheader *h =
169 (struct adfs_bigdirheader *) dir->bh_fplus[0]->b_data;
120 struct adfs_bigdirentry bde; 170 struct adfs_bigdirentry bde;
121 unsigned int offset; 171 unsigned int offset;
122 int i, ret = -ENOENT; 172 int i, ret = -ENOENT;
@@ -147,6 +197,24 @@ adfs_fplus_getnext(struct adfs_dir *dir, struct object_info *obj)
147 if (obj->name[i] == '/') 197 if (obj->name[i] == '/')
148 obj->name[i] = '.'; 198 obj->name[i] = '.';
149 199
200 obj->filetype = -1;
201
202 /*
203 * object is a file and is filetyped and timestamped?
204 * RISC OS 12-bit filetype is stored in load_address[19:8]
205 */
206 if ((0 == (obj->attr & ADFS_NDA_DIRECTORY)) &&
207 (0xfff00000 == (0xfff00000 & obj->loadaddr))) {
208 obj->filetype = (__u16) ((0x000fff00 & obj->loadaddr) >> 8);
209
210 /* optionally append the ,xyz hex filetype suffix */
211 if (ADFS_SB(dir->sb)->s_ftsuffix)
212 obj->name_len +=
213 append_filetype_suffix(
214 &obj->name[obj->name_len],
215 obj->filetype);
216 }
217
150 dir->pos += 1; 218 dir->pos += 1;
151 ret = 0; 219 ret = 0;
152out: 220out:
@@ -160,7 +228,7 @@ adfs_fplus_sync(struct adfs_dir *dir)
160 int i; 228 int i;
161 229
162 for (i = dir->nr_buffers - 1; i >= 0; i--) { 230 for (i = dir->nr_buffers - 1; i >= 0; i--) {
163 struct buffer_head *bh = dir->bh[i]; 231 struct buffer_head *bh = dir->bh_fplus[i];
164 sync_dirty_buffer(bh); 232 sync_dirty_buffer(bh);
165 if (buffer_req(bh) && !buffer_uptodate(bh)) 233 if (buffer_req(bh) && !buffer_uptodate(bh))
166 err = -EIO; 234 err = -EIO;
@@ -174,8 +242,17 @@ adfs_fplus_free(struct adfs_dir *dir)
174{ 242{
175 int i; 243 int i;
176 244
177 for (i = 0; i < dir->nr_buffers; i++) 245 if (dir->bh_fplus) {
178 brelse(dir->bh[i]); 246 for (i = 0; i < dir->nr_buffers; i++)
247 brelse(dir->bh_fplus[i]);
248
249 if (&dir->bh[0] != dir->bh_fplus)
250 kfree(dir->bh_fplus);
251
252 dir->bh_fplus = NULL;
253 }
254
255 dir->nr_buffers = 0;
179 dir->sb = NULL; 256 dir->sb = NULL;
180} 257}
181 258
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 09fe40198d1c..d5250c5aae21 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -72,32 +72,18 @@ static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
72static const struct address_space_operations adfs_aops = { 72static const struct address_space_operations adfs_aops = {
73 .readpage = adfs_readpage, 73 .readpage = adfs_readpage,
74 .writepage = adfs_writepage, 74 .writepage = adfs_writepage,
75 .sync_page = block_sync_page,
76 .write_begin = adfs_write_begin, 75 .write_begin = adfs_write_begin,
77 .write_end = generic_write_end, 76 .write_end = generic_write_end,
78 .bmap = _adfs_bmap 77 .bmap = _adfs_bmap
79}; 78};
80 79
81static inline unsigned int
82adfs_filetype(struct inode *inode)
83{
84 unsigned int type;
85
86 if (ADFS_I(inode)->stamped)
87 type = (ADFS_I(inode)->loadaddr >> 8) & 0xfff;
88 else
89 type = (unsigned int) -1;
90
91 return type;
92}
93
94/* 80/*
95 * Convert ADFS attributes and filetype to Linux permission. 81 * Convert ADFS attributes and filetype to Linux permission.
96 */ 82 */
97static umode_t 83static umode_t
98adfs_atts2mode(struct super_block *sb, struct inode *inode) 84adfs_atts2mode(struct super_block *sb, struct inode *inode)
99{ 85{
100 unsigned int filetype, attr = ADFS_I(inode)->attr; 86 unsigned int attr = ADFS_I(inode)->attr;
101 umode_t mode, rmask; 87 umode_t mode, rmask;
102 struct adfs_sb_info *asb = ADFS_SB(sb); 88 struct adfs_sb_info *asb = ADFS_SB(sb);
103 89
@@ -106,9 +92,7 @@ adfs_atts2mode(struct super_block *sb, struct inode *inode)
106 return S_IFDIR | S_IXUGO | mode; 92 return S_IFDIR | S_IXUGO | mode;
107 } 93 }
108 94
109 filetype = adfs_filetype(inode); 95 switch (ADFS_I(inode)->filetype) {
110
111 switch (filetype) {
112 case 0xfc0: /* LinkFS */ 96 case 0xfc0: /* LinkFS */
113 return S_IFLNK|S_IRWXUGO; 97 return S_IFLNK|S_IRWXUGO;
114 98
@@ -174,50 +158,48 @@ adfs_mode2atts(struct super_block *sb, struct inode *inode)
174 158
175/* 159/*
176 * Convert an ADFS time to Unix time. ADFS has a 40-bit centi-second time 160 * Convert an ADFS time to Unix time. ADFS has a 40-bit centi-second time
177 * referenced to 1 Jan 1900 (til 2248) 161 * referenced to 1 Jan 1900 (til 2248) so we need to discard 2208988800 seconds
162 * of time to convert from RISC OS epoch to Unix epoch.
178 */ 163 */
179static void 164static void
180adfs_adfs2unix_time(struct timespec *tv, struct inode *inode) 165adfs_adfs2unix_time(struct timespec *tv, struct inode *inode)
181{ 166{
182 unsigned int high, low; 167 unsigned int high, low;
168 /* 01 Jan 1970 00:00:00 (Unix epoch) as nanoseconds since
169 * 01 Jan 1900 00:00:00 (RISC OS epoch)
170 */
171 static const s64 nsec_unix_epoch_diff_risc_os_epoch =
172 2208988800000000000LL;
173 s64 nsec;
183 174
184 if (ADFS_I(inode)->stamped == 0) 175 if (ADFS_I(inode)->stamped == 0)
185 goto cur_time; 176 goto cur_time;
186 177
187 high = ADFS_I(inode)->loadaddr << 24; 178 high = ADFS_I(inode)->loadaddr & 0xFF; /* top 8 bits of timestamp */
188 low = ADFS_I(inode)->execaddr; 179 low = ADFS_I(inode)->execaddr; /* bottom 32 bits of timestamp */
189 180
190 high |= low >> 8; 181 /* convert 40-bit centi-seconds to 32-bit seconds
191 low &= 255; 182 * going via nanoseconds to retain precision
183 */
184 nsec = (((s64) high << 32) | (s64) low) * 10000000; /* cs to ns */
192 185
193 /* Files dated pre 01 Jan 1970 00:00:00. */ 186 /* Files dated pre 01 Jan 1970 00:00:00. */
194 if (high < 0x336e996a) 187 if (nsec < nsec_unix_epoch_diff_risc_os_epoch)
195 goto too_early; 188 goto too_early;
196 189
197 /* Files dated post 18 Jan 2038 03:14:05. */ 190 /* convert from RISC OS to Unix epoch */
198 if (high >= 0x656e9969) 191 nsec -= nsec_unix_epoch_diff_risc_os_epoch;
199 goto too_late;
200 192
201 /* discard 2208988800 (0x336e996a00) seconds of time */ 193 *tv = ns_to_timespec(nsec);
202 high -= 0x336e996a;
203
204 /* convert 40-bit centi-seconds to 32-bit seconds */
205 tv->tv_sec = (((high % 100) << 8) + low) / 100 + (high / 100 << 8);
206 tv->tv_nsec = 0;
207 return; 194 return;
208 195
209 cur_time: 196 cur_time:
210 *tv = CURRENT_TIME_SEC; 197 *tv = CURRENT_TIME;
211 return; 198 return;
212 199
213 too_early: 200 too_early:
214 tv->tv_sec = tv->tv_nsec = 0; 201 tv->tv_sec = tv->tv_nsec = 0;
215 return; 202 return;
216
217 too_late:
218 tv->tv_sec = 0x7ffffffd;
219 tv->tv_nsec = 0;
220 return;
221} 203}
222 204
223/* 205/*
@@ -279,7 +261,8 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
279 ADFS_I(inode)->loadaddr = obj->loadaddr; 261 ADFS_I(inode)->loadaddr = obj->loadaddr;
280 ADFS_I(inode)->execaddr = obj->execaddr; 262 ADFS_I(inode)->execaddr = obj->execaddr;
281 ADFS_I(inode)->attr = obj->attr; 263 ADFS_I(inode)->attr = obj->attr;
282 ADFS_I(inode)->stamped = ((obj->loadaddr & 0xfff00000) == 0xfff00000); 264 ADFS_I(inode)->filetype = obj->filetype;
265 ADFS_I(inode)->stamped = ((obj->loadaddr & 0xfff00000) == 0xfff00000);
283 266
284 inode->i_mode = adfs_atts2mode(sb, inode); 267 inode->i_mode = adfs_atts2mode(sb, inode);
285 adfs_adfs2unix_time(&inode->i_mtime, inode); 268 adfs_adfs2unix_time(&inode->i_mtime, inode);
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 06d7388b477b..c8bf36a1996a 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -138,17 +138,20 @@ static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
138 seq_printf(seq, ",ownmask=%o", asb->s_owner_mask); 138 seq_printf(seq, ",ownmask=%o", asb->s_owner_mask);
139 if (asb->s_other_mask != ADFS_DEFAULT_OTHER_MASK) 139 if (asb->s_other_mask != ADFS_DEFAULT_OTHER_MASK)
140 seq_printf(seq, ",othmask=%o", asb->s_other_mask); 140 seq_printf(seq, ",othmask=%o", asb->s_other_mask);
141 if (asb->s_ftsuffix != 0)
142 seq_printf(seq, ",ftsuffix=%u", asb->s_ftsuffix);
141 143
142 return 0; 144 return 0;
143} 145}
144 146
145enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err}; 147enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_ftsuffix, Opt_err};
146 148
147static const match_table_t tokens = { 149static const match_table_t tokens = {
148 {Opt_uid, "uid=%u"}, 150 {Opt_uid, "uid=%u"},
149 {Opt_gid, "gid=%u"}, 151 {Opt_gid, "gid=%u"},
150 {Opt_ownmask, "ownmask=%o"}, 152 {Opt_ownmask, "ownmask=%o"},
151 {Opt_othmask, "othmask=%o"}, 153 {Opt_othmask, "othmask=%o"},
154 {Opt_ftsuffix, "ftsuffix=%u"},
152 {Opt_err, NULL} 155 {Opt_err, NULL}
153}; 156};
154 157
@@ -189,6 +192,11 @@ static int parse_options(struct super_block *sb, char *options)
189 return -EINVAL; 192 return -EINVAL;
190 asb->s_other_mask = option; 193 asb->s_other_mask = option;
191 break; 194 break;
195 case Opt_ftsuffix:
196 if (match_int(args, &option))
197 return -EINVAL;
198 asb->s_ftsuffix = option;
199 break;
192 default: 200 default:
193 printk("ADFS-fs: unrecognised mount option \"%s\" " 201 printk("ADFS-fs: unrecognised mount option \"%s\" "
194 "or missing value\n", p); 202 "or missing value\n", p);
@@ -366,6 +374,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
366 asb->s_gid = 0; 374 asb->s_gid = 0;
367 asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK; 375 asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK;
368 asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK; 376 asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK;
377 asb->s_ftsuffix = 0;
369 378
370 if (parse_options(sb, data)) 379 if (parse_options(sb, data))
371 goto error; 380 goto error;
@@ -445,11 +454,13 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
445 454
446 root_obj.parent_id = root_obj.file_id = le32_to_cpu(dr->root); 455 root_obj.parent_id = root_obj.file_id = le32_to_cpu(dr->root);
447 root_obj.name_len = 0; 456 root_obj.name_len = 0;
448 root_obj.loadaddr = 0; 457 /* Set root object date as 01 Jan 1987 00:00:00 */
449 root_obj.execaddr = 0; 458 root_obj.loadaddr = 0xfff0003f;
459 root_obj.execaddr = 0xec22c000;
450 root_obj.size = ADFS_NEWDIR_SIZE; 460 root_obj.size = ADFS_NEWDIR_SIZE;
451 root_obj.attr = ADFS_NDA_DIRECTORY | ADFS_NDA_OWNER_READ | 461 root_obj.attr = ADFS_NDA_DIRECTORY | ADFS_NDA_OWNER_READ |
452 ADFS_NDA_OWNER_WRITE | ADFS_NDA_PUBLIC_READ; 462 ADFS_NDA_OWNER_WRITE | ADFS_NDA_PUBLIC_READ;
463 root_obj.filetype = -1;
453 464
454 /* 465 /*
455 * If this is a F+ disk with variable length directories, 466 * If this is a F+ disk with variable length directories,
@@ -463,6 +474,12 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
463 asb->s_dir = &adfs_f_dir_ops; 474 asb->s_dir = &adfs_f_dir_ops;
464 asb->s_namelen = ADFS_F_NAME_LEN; 475 asb->s_namelen = ADFS_F_NAME_LEN;
465 } 476 }
477 /*
478 * ,xyz hex filetype suffix may be added by driver
479 * to files that have valid RISC OS filetype
480 */
481 if (asb->s_ftsuffix)
482 asb->s_namelen += 4;
466 483
467 sb->s_d_op = &adfs_dentry_operations; 484 sb->s_d_op = &adfs_dentry_operations;
468 root = adfs_iget(sb, &root_obj); 485 root = adfs_iget(sb, &root_obj);
diff --git a/fs/affs/Makefile b/fs/affs/Makefile
index b2c4f54446f3..3988b4a78339 100644
--- a/fs/affs/Makefile
+++ b/fs/affs/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the Linux affs filesystem routines. 2# Makefile for the Linux affs filesystem routines.
3# 3#
4 4
5#EXTRA_CFLAGS=-DDEBUG=1 5#ccflags-y := -DDEBUG=1
6 6
7obj-$(CONFIG_AFFS_FS) += affs.o 7obj-$(CONFIG_AFFS_FS) += affs.o
8 8
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 0a90dcd46de2..acf321b70fcd 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -429,7 +429,6 @@ static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
429const struct address_space_operations affs_aops = { 429const struct address_space_operations affs_aops = {
430 .readpage = affs_readpage, 430 .readpage = affs_readpage,
431 .writepage = affs_writepage, 431 .writepage = affs_writepage,
432 .sync_page = block_sync_page,
433 .write_begin = affs_write_begin, 432 .write_begin = affs_write_begin,
434 .write_end = generic_write_end, 433 .write_end = generic_write_end,
435 .bmap = _affs_bmap 434 .bmap = _affs_bmap
@@ -786,7 +785,6 @@ out:
786const struct address_space_operations affs_aops_ofs = { 785const struct address_space_operations affs_aops_ofs = {
787 .readpage = affs_readpage_ofs, 786 .readpage = affs_readpage_ofs,
788 //.writepage = affs_writepage_ofs, 787 //.writepage = affs_writepage_ofs,
789 //.sync_page = affs_sync_page_ofs,
790 .write_begin = affs_write_begin_ofs, 788 .write_begin = affs_write_begin_ofs,
791 .write_end = affs_write_end_ofs 789 .write_end = affs_write_end_ofs
792}; 790};
diff --git a/fs/aio.c b/fs/aio.c
index 7f54f43b8f7c..e29ec485af25 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -34,8 +34,6 @@
34#include <linux/security.h> 34#include <linux/security.h>
35#include <linux/eventfd.h> 35#include <linux/eventfd.h>
36#include <linux/blkdev.h> 36#include <linux/blkdev.h>
37#include <linux/mempool.h>
38#include <linux/hash.h>
39#include <linux/compat.h> 37#include <linux/compat.h>
40 38
41#include <asm/kmap_types.h> 39#include <asm/kmap_types.h>
@@ -65,14 +63,6 @@ static DECLARE_WORK(fput_work, aio_fput_routine);
65static DEFINE_SPINLOCK(fput_lock); 63static DEFINE_SPINLOCK(fput_lock);
66static LIST_HEAD(fput_head); 64static LIST_HEAD(fput_head);
67 65
68#define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */
69#define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS)
70struct aio_batch_entry {
71 struct hlist_node list;
72 struct address_space *mapping;
73};
74mempool_t *abe_pool;
75
76static void aio_kick_handler(struct work_struct *); 66static void aio_kick_handler(struct work_struct *);
77static void aio_queue_work(struct kioctx *); 67static void aio_queue_work(struct kioctx *);
78 68
@@ -86,8 +76,7 @@ static int __init aio_setup(void)
86 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 76 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
87 77
88 aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */ 78 aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */
89 abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); 79 BUG_ON(!aio_wq);
90 BUG_ON(!aio_wq || !abe_pool);
91 80
92 pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); 81 pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
93 82
@@ -520,7 +509,7 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
520 ctx->reqs_active--; 509 ctx->reqs_active--;
521 510
522 if (unlikely(!ctx->reqs_active && ctx->dead)) 511 if (unlikely(!ctx->reqs_active && ctx->dead))
523 wake_up(&ctx->wait); 512 wake_up_all(&ctx->wait);
524} 513}
525 514
526static void aio_fput_routine(struct work_struct *data) 515static void aio_fput_routine(struct work_struct *data)
@@ -1229,7 +1218,7 @@ static void io_destroy(struct kioctx *ioctx)
1229 * by other CPUs at this point. Right now, we rely on the 1218 * by other CPUs at this point. Right now, we rely on the
1230 * locking done by the above calls to ensure this consistency. 1219 * locking done by the above calls to ensure this consistency.
1231 */ 1220 */
1232 wake_up(&ioctx->wait); 1221 wake_up_all(&ioctx->wait);
1233 put_ioctx(ioctx); /* once for the lookup */ 1222 put_ioctx(ioctx); /* once for the lookup */
1234} 1223}
1235 1224
@@ -1525,57 +1514,8 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
1525 return 0; 1514 return 0;
1526} 1515}
1527 1516
1528static void aio_batch_add(struct address_space *mapping,
1529 struct hlist_head *batch_hash)
1530{
1531 struct aio_batch_entry *abe;
1532 struct hlist_node *pos;
1533 unsigned bucket;
1534
1535 bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS);
1536 hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) {
1537 if (abe->mapping == mapping)
1538 return;
1539 }
1540
1541 abe = mempool_alloc(abe_pool, GFP_KERNEL);
1542
1543 /*
1544 * we should be using igrab here, but
1545 * we don't want to hammer on the global
1546 * inode spinlock just to take an extra
1547 * reference on a file that we must already
1548 * have a reference to.
1549 *
1550 * When we're called, we always have a reference
1551 * on the file, so we must always have a reference
1552 * on the inode, so ihold() is safe here.
1553 */
1554 ihold(mapping->host);
1555 abe->mapping = mapping;
1556 hlist_add_head(&abe->list, &batch_hash[bucket]);
1557 return;
1558}
1559
1560static void aio_batch_free(struct hlist_head *batch_hash)
1561{
1562 struct aio_batch_entry *abe;
1563 struct hlist_node *pos, *n;
1564 int i;
1565
1566 for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) {
1567 hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) {
1568 blk_run_address_space(abe->mapping);
1569 iput(abe->mapping->host);
1570 hlist_del(&abe->list);
1571 mempool_free(abe, abe_pool);
1572 }
1573 }
1574}
1575
1576static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1517static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1577 struct iocb *iocb, struct hlist_head *batch_hash, 1518 struct iocb *iocb, bool compat)
1578 bool compat)
1579{ 1519{
1580 struct kiocb *req; 1520 struct kiocb *req;
1581 struct file *file; 1521 struct file *file;
@@ -1666,11 +1606,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1666 ; 1606 ;
1667 } 1607 }
1668 spin_unlock_irq(&ctx->ctx_lock); 1608 spin_unlock_irq(&ctx->ctx_lock);
1669 if (req->ki_opcode == IOCB_CMD_PREAD ||
1670 req->ki_opcode == IOCB_CMD_PREADV ||
1671 req->ki_opcode == IOCB_CMD_PWRITE ||
1672 req->ki_opcode == IOCB_CMD_PWRITEV)
1673 aio_batch_add(file->f_mapping, batch_hash);
1674 1609
1675 aio_put_req(req); /* drop extra ref to req */ 1610 aio_put_req(req); /* drop extra ref to req */
1676 return 0; 1611 return 0;
@@ -1687,7 +1622,7 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1687 struct kioctx *ctx; 1622 struct kioctx *ctx;
1688 long ret = 0; 1623 long ret = 0;
1689 int i; 1624 int i;
1690 struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, }; 1625 struct blk_plug plug;
1691 1626
1692 if (unlikely(nr < 0)) 1627 if (unlikely(nr < 0))
1693 return -EINVAL; 1628 return -EINVAL;
@@ -1704,6 +1639,8 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1704 return -EINVAL; 1639 return -EINVAL;
1705 } 1640 }
1706 1641
1642 blk_start_plug(&plug);
1643
1707 /* 1644 /*
1708 * AKPM: should this return a partial result if some of the IOs were 1645 * AKPM: should this return a partial result if some of the IOs were
1709 * successfully submitted? 1646 * successfully submitted?
@@ -1722,11 +1659,11 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1722 break; 1659 break;
1723 } 1660 }
1724 1661
1725 ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash, compat); 1662 ret = io_submit_one(ctx, user_iocb, &tmp, compat);
1726 if (ret) 1663 if (ret)
1727 break; 1664 break;
1728 } 1665 }
1729 aio_batch_free(batch_hash); 1666 blk_finish_plug(&plug);
1730 1667
1731 put_ioctx(ctx); 1668 put_ioctx(ctx);
1732 return i ? i : ret; 1669 return i ? i : ret;
diff --git a/fs/attr.c b/fs/attr.c
index 7ca41811afa1..1007ed616314 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -59,7 +59,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
59 59
60 /* Make sure a caller can chmod. */ 60 /* Make sure a caller can chmod. */
61 if (ia_valid & ATTR_MODE) { 61 if (ia_valid & ATTR_MODE) {
62 if (!is_owner_or_cap(inode)) 62 if (!inode_owner_or_capable(inode))
63 return -EPERM; 63 return -EPERM;
64 /* Also check the setgid bit! */ 64 /* Also check the setgid bit! */
65 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : 65 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
@@ -69,7 +69,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
69 69
70 /* Check for setting the inode time. */ 70 /* Check for setting the inode time. */
71 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) { 71 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
72 if (!is_owner_or_cap(inode)) 72 if (!inode_owner_or_capable(inode))
73 return -EPERM; 73 return -EPERM;
74 } 74 }
75 75
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 54f923792728..475f9c597cb7 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -61,8 +61,6 @@ do { \
61 current->pid, __func__, ##args); \ 61 current->pid, __func__, ##args); \
62} while (0) 62} while (0)
63 63
64extern spinlock_t autofs4_lock;
65
66/* Unified info structure. This is pointed to by both the dentry and 64/* Unified info structure. This is pointed to by both the dentry and
67 inode structures. Each file in the filesystem has an instance of this 65 inode structures. Each file in the filesystem has an instance of this
68 structure. It holds a reference to the dentry, so dentries are never 66 structure. It holds a reference to the dentry, so dentries are never
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 1442da4860e5..509fe1eb66ae 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -372,6 +372,10 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
372 return -EBUSY; 372 return -EBUSY;
373 } else { 373 } else {
374 struct file *pipe = fget(pipefd); 374 struct file *pipe = fget(pipefd);
375 if (!pipe) {
376 err = -EBADF;
377 goto out;
378 }
375 if (!pipe->f_op || !pipe->f_op->write) { 379 if (!pipe->f_op || !pipe->f_op->write) {
376 err = -EPIPE; 380 err = -EPIPE;
377 fput(pipe); 381 fput(pipe);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index f43100b9662b..450f529a4eae 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -87,18 +87,70 @@ done:
87} 87}
88 88
89/* 89/*
90 * Calculate and dget next entry in the subdirs list under root.
91 */
92static struct dentry *get_next_positive_subdir(struct dentry *prev,
93 struct dentry *root)
94{
95 struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
96 struct list_head *next;
97 struct dentry *p, *q;
98
99 spin_lock(&sbi->lookup_lock);
100
101 if (prev == NULL) {
102 spin_lock(&root->d_lock);
103 prev = dget_dlock(root);
104 next = prev->d_subdirs.next;
105 p = prev;
106 goto start;
107 }
108
109 p = prev;
110 spin_lock(&p->d_lock);
111again:
112 next = p->d_u.d_child.next;
113start:
114 if (next == &root->d_subdirs) {
115 spin_unlock(&p->d_lock);
116 spin_unlock(&sbi->lookup_lock);
117 dput(prev);
118 return NULL;
119 }
120
121 q = list_entry(next, struct dentry, d_u.d_child);
122
123 spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED);
124 /* Negative dentry - try next */
125 if (!simple_positive(q)) {
126 spin_unlock(&p->d_lock);
127 p = q;
128 goto again;
129 }
130 dget_dlock(q);
131 spin_unlock(&q->d_lock);
132 spin_unlock(&p->d_lock);
133 spin_unlock(&sbi->lookup_lock);
134
135 dput(prev);
136
137 return q;
138}
139
140/*
90 * Calculate and dget next entry in top down tree traversal. 141 * Calculate and dget next entry in top down tree traversal.
91 */ 142 */
92static struct dentry *get_next_positive_dentry(struct dentry *prev, 143static struct dentry *get_next_positive_dentry(struct dentry *prev,
93 struct dentry *root) 144 struct dentry *root)
94{ 145{
146 struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
95 struct list_head *next; 147 struct list_head *next;
96 struct dentry *p, *ret; 148 struct dentry *p, *ret;
97 149
98 if (prev == NULL) 150 if (prev == NULL)
99 return dget(root); 151 return dget(root);
100 152
101 spin_lock(&autofs4_lock); 153 spin_lock(&sbi->lookup_lock);
102relock: 154relock:
103 p = prev; 155 p = prev;
104 spin_lock(&p->d_lock); 156 spin_lock(&p->d_lock);
@@ -110,7 +162,7 @@ again:
110 162
111 if (p == root) { 163 if (p == root) {
112 spin_unlock(&p->d_lock); 164 spin_unlock(&p->d_lock);
113 spin_unlock(&autofs4_lock); 165 spin_unlock(&sbi->lookup_lock);
114 dput(prev); 166 dput(prev);
115 return NULL; 167 return NULL;
116 } 168 }
@@ -140,7 +192,7 @@ again:
140 dget_dlock(ret); 192 dget_dlock(ret);
141 spin_unlock(&ret->d_lock); 193 spin_unlock(&ret->d_lock);
142 spin_unlock(&p->d_lock); 194 spin_unlock(&p->d_lock);
143 spin_unlock(&autofs4_lock); 195 spin_unlock(&sbi->lookup_lock);
144 196
145 dput(prev); 197 dput(prev);
146 198
@@ -290,11 +342,8 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
290 spin_lock(&sbi->fs_lock); 342 spin_lock(&sbi->fs_lock);
291 ino = autofs4_dentry_ino(root); 343 ino = autofs4_dentry_ino(root);
292 /* No point expiring a pending mount */ 344 /* No point expiring a pending mount */
293 if (ino->flags & AUTOFS_INF_PENDING) { 345 if (ino->flags & AUTOFS_INF_PENDING)
294 spin_unlock(&sbi->fs_lock); 346 goto out;
295 return NULL;
296 }
297 managed_dentry_set_transit(root);
298 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { 347 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
299 struct autofs_info *ino = autofs4_dentry_ino(root); 348 struct autofs_info *ino = autofs4_dentry_ino(root);
300 ino->flags |= AUTOFS_INF_EXPIRING; 349 ino->flags |= AUTOFS_INF_EXPIRING;
@@ -302,7 +351,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
302 spin_unlock(&sbi->fs_lock); 351 spin_unlock(&sbi->fs_lock);
303 return root; 352 return root;
304 } 353 }
305 managed_dentry_clear_transit(root); 354out:
306 spin_unlock(&sbi->fs_lock); 355 spin_unlock(&sbi->fs_lock);
307 dput(root); 356 dput(root);
308 357
@@ -336,13 +385,12 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
336 timeout = sbi->exp_timeout; 385 timeout = sbi->exp_timeout;
337 386
338 dentry = NULL; 387 dentry = NULL;
339 while ((dentry = get_next_positive_dentry(dentry, root))) { 388 while ((dentry = get_next_positive_subdir(dentry, root))) {
340 spin_lock(&sbi->fs_lock); 389 spin_lock(&sbi->fs_lock);
341 ino = autofs4_dentry_ino(dentry); 390 ino = autofs4_dentry_ino(dentry);
342 /* No point expiring a pending mount */ 391 /* No point expiring a pending mount */
343 if (ino->flags & AUTOFS_INF_PENDING) 392 if (ino->flags & AUTOFS_INF_PENDING)
344 goto cont; 393 goto next;
345 managed_dentry_set_transit(dentry);
346 394
347 /* 395 /*
348 * Case 1: (i) indirect mount or top level pseudo direct mount 396 * Case 1: (i) indirect mount or top level pseudo direct mount
@@ -402,8 +450,6 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
402 } 450 }
403 } 451 }
404next: 452next:
405 managed_dentry_clear_transit(dentry);
406cont:
407 spin_unlock(&sbi->fs_lock); 453 spin_unlock(&sbi->fs_lock);
408 } 454 }
409 return NULL; 455 return NULL;
@@ -415,13 +461,13 @@ found:
415 ino->flags |= AUTOFS_INF_EXPIRING; 461 ino->flags |= AUTOFS_INF_EXPIRING;
416 init_completion(&ino->expire_complete); 462 init_completion(&ino->expire_complete);
417 spin_unlock(&sbi->fs_lock); 463 spin_unlock(&sbi->fs_lock);
418 spin_lock(&autofs4_lock); 464 spin_lock(&sbi->lookup_lock);
419 spin_lock(&expired->d_parent->d_lock); 465 spin_lock(&expired->d_parent->d_lock);
420 spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); 466 spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
421 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); 467 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
422 spin_unlock(&expired->d_lock); 468 spin_unlock(&expired->d_lock);
423 spin_unlock(&expired->d_parent->d_lock); 469 spin_unlock(&expired->d_parent->d_lock);
424 spin_unlock(&autofs4_lock); 470 spin_unlock(&sbi->lookup_lock);
425 return expired; 471 return expired;
426} 472}
427 473
@@ -484,8 +530,6 @@ int autofs4_expire_run(struct super_block *sb,
484 spin_lock(&sbi->fs_lock); 530 spin_lock(&sbi->fs_lock);
485 ino = autofs4_dentry_ino(dentry); 531 ino = autofs4_dentry_ino(dentry);
486 ino->flags &= ~AUTOFS_INF_EXPIRING; 532 ino->flags &= ~AUTOFS_INF_EXPIRING;
487 if (!d_unhashed(dentry))
488 managed_dentry_clear_transit(dentry);
489 complete_all(&ino->expire_complete); 533 complete_all(&ino->expire_complete);
490 spin_unlock(&sbi->fs_lock); 534 spin_unlock(&sbi->fs_lock);
491 535
@@ -513,9 +557,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
513 spin_lock(&sbi->fs_lock); 557 spin_lock(&sbi->fs_lock);
514 ino->flags &= ~AUTOFS_INF_EXPIRING; 558 ino->flags &= ~AUTOFS_INF_EXPIRING;
515 spin_lock(&dentry->d_lock); 559 spin_lock(&dentry->d_lock);
516 if (ret) 560 if (!ret) {
517 __managed_dentry_clear_transit(dentry);
518 else {
519 if ((IS_ROOT(dentry) || 561 if ((IS_ROOT(dentry) ||
520 (autofs_type_indirect(sbi->type) && 562 (autofs_type_indirect(sbi->type) &&
521 IS_ROOT(dentry->d_parent))) && 563 IS_ROOT(dentry->d_parent))) &&
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index e6f84d26f4cf..96804a17bbd0 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -23,8 +23,6 @@
23 23
24#include "autofs_i.h" 24#include "autofs_i.h"
25 25
26DEFINE_SPINLOCK(autofs4_lock);
27
28static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); 26static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
29static int autofs4_dir_unlink(struct inode *,struct dentry *); 27static int autofs4_dir_unlink(struct inode *,struct dentry *);
30static int autofs4_dir_rmdir(struct inode *,struct dentry *); 28static int autofs4_dir_rmdir(struct inode *,struct dentry *);
@@ -125,15 +123,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
125 * autofs file system so just let the libfs routines handle 123 * autofs file system so just let the libfs routines handle
126 * it. 124 * it.
127 */ 125 */
128 spin_lock(&autofs4_lock); 126 spin_lock(&sbi->lookup_lock);
129 spin_lock(&dentry->d_lock); 127 spin_lock(&dentry->d_lock);
130 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { 128 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
131 spin_unlock(&dentry->d_lock); 129 spin_unlock(&dentry->d_lock);
132 spin_unlock(&autofs4_lock); 130 spin_unlock(&sbi->lookup_lock);
133 return -ENOENT; 131 return -ENOENT;
134 } 132 }
135 spin_unlock(&dentry->d_lock); 133 spin_unlock(&dentry->d_lock);
136 spin_unlock(&autofs4_lock); 134 spin_unlock(&sbi->lookup_lock);
137 135
138out: 136out:
139 return dcache_dir_open(inode, file); 137 return dcache_dir_open(inode, file);
@@ -171,7 +169,6 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
171 const unsigned char *str = name->name; 169 const unsigned char *str = name->name;
172 struct list_head *p, *head; 170 struct list_head *p, *head;
173 171
174 spin_lock(&autofs4_lock);
175 spin_lock(&sbi->lookup_lock); 172 spin_lock(&sbi->lookup_lock);
176 head = &sbi->active_list; 173 head = &sbi->active_list;
177 list_for_each(p, head) { 174 list_for_each(p, head) {
@@ -204,14 +201,12 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
204 dget_dlock(active); 201 dget_dlock(active);
205 spin_unlock(&active->d_lock); 202 spin_unlock(&active->d_lock);
206 spin_unlock(&sbi->lookup_lock); 203 spin_unlock(&sbi->lookup_lock);
207 spin_unlock(&autofs4_lock);
208 return active; 204 return active;
209 } 205 }
210next: 206next:
211 spin_unlock(&active->d_lock); 207 spin_unlock(&active->d_lock);
212 } 208 }
213 spin_unlock(&sbi->lookup_lock); 209 spin_unlock(&sbi->lookup_lock);
214 spin_unlock(&autofs4_lock);
215 210
216 return NULL; 211 return NULL;
217} 212}
@@ -226,7 +221,6 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
226 const unsigned char *str = name->name; 221 const unsigned char *str = name->name;
227 struct list_head *p, *head; 222 struct list_head *p, *head;
228 223
229 spin_lock(&autofs4_lock);
230 spin_lock(&sbi->lookup_lock); 224 spin_lock(&sbi->lookup_lock);
231 head = &sbi->expiring_list; 225 head = &sbi->expiring_list;
232 list_for_each(p, head) { 226 list_for_each(p, head) {
@@ -259,14 +253,12 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
259 dget_dlock(expiring); 253 dget_dlock(expiring);
260 spin_unlock(&expiring->d_lock); 254 spin_unlock(&expiring->d_lock);
261 spin_unlock(&sbi->lookup_lock); 255 spin_unlock(&sbi->lookup_lock);
262 spin_unlock(&autofs4_lock);
263 return expiring; 256 return expiring;
264 } 257 }
265next: 258next:
266 spin_unlock(&expiring->d_lock); 259 spin_unlock(&expiring->d_lock);
267 } 260 }
268 spin_unlock(&sbi->lookup_lock); 261 spin_unlock(&sbi->lookup_lock);
269 spin_unlock(&autofs4_lock);
270 262
271 return NULL; 263 return NULL;
272} 264}
@@ -275,17 +267,16 @@ static int autofs4_mount_wait(struct dentry *dentry)
275{ 267{
276 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 268 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
277 struct autofs_info *ino = autofs4_dentry_ino(dentry); 269 struct autofs_info *ino = autofs4_dentry_ino(dentry);
278 int status; 270 int status = 0;
279 271
280 if (ino->flags & AUTOFS_INF_PENDING) { 272 if (ino->flags & AUTOFS_INF_PENDING) {
281 DPRINTK("waiting for mount name=%.*s", 273 DPRINTK("waiting for mount name=%.*s",
282 dentry->d_name.len, dentry->d_name.name); 274 dentry->d_name.len, dentry->d_name.name);
283 status = autofs4_wait(sbi, dentry, NFY_MOUNT); 275 status = autofs4_wait(sbi, dentry, NFY_MOUNT);
284 DPRINTK("mount wait done status=%d", status); 276 DPRINTK("mount wait done status=%d", status);
285 ino->last_used = jiffies;
286 return status;
287 } 277 }
288 return 0; 278 ino->last_used = jiffies;
279 return status;
289} 280}
290 281
291static int do_expire_wait(struct dentry *dentry) 282static int do_expire_wait(struct dentry *dentry)
@@ -319,9 +310,12 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
319 */ 310 */
320 if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { 311 if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
321 struct dentry *parent = dentry->d_parent; 312 struct dentry *parent = dentry->d_parent;
313 struct autofs_info *ino;
322 struct dentry *new = d_lookup(parent, &dentry->d_name); 314 struct dentry *new = d_lookup(parent, &dentry->d_name);
323 if (!new) 315 if (!new)
324 return NULL; 316 return NULL;
317 ino = autofs4_dentry_ino(new);
318 ino->last_used = jiffies;
325 dput(path->dentry); 319 dput(path->dentry);
326 path->dentry = new; 320 path->dentry = new;
327 } 321 }
@@ -338,18 +332,6 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
338 DPRINTK("dentry=%p %.*s", 332 DPRINTK("dentry=%p %.*s",
339 dentry, dentry->d_name.len, dentry->d_name.name); 333 dentry, dentry->d_name.len, dentry->d_name.name);
340 334
341 /*
342 * Someone may have manually umounted this or it was a submount
343 * that has gone away.
344 */
345 spin_lock(&dentry->d_lock);
346 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
347 if (!(dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
348 (dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
349 __managed_dentry_set_transit(path->dentry);
350 }
351 spin_unlock(&dentry->d_lock);
352
353 /* The daemon never triggers a mount. */ 335 /* The daemon never triggers a mount. */
354 if (autofs4_oz_mode(sbi)) 336 if (autofs4_oz_mode(sbi))
355 return NULL; 337 return NULL;
@@ -418,18 +400,17 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
418done: 400done:
419 if (!(ino->flags & AUTOFS_INF_EXPIRING)) { 401 if (!(ino->flags & AUTOFS_INF_EXPIRING)) {
420 /* 402 /*
421 * Any needed mounting has been completed and the path updated 403 * Any needed mounting has been completed and the path
422 * so turn this into a normal dentry so we don't continually 404 * updated so clear DCACHE_NEED_AUTOMOUNT so we don't
423 * call ->d_automount() and ->d_manage(). 405 * call ->d_automount() on rootless multi-mounts since
424 */ 406 * it can lead to an incorrect ELOOP error return.
425 spin_lock(&dentry->d_lock); 407 *
426 __managed_dentry_clear_transit(dentry);
427 /*
428 * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and 408 * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and
429 * symlinks as in all other cases the dentry will be covered by 409 * symlinks as in all other cases the dentry will be covered by
430 * an actual mount so ->d_automount() won't be called during 410 * an actual mount so ->d_automount() won't be called during
431 * the follow. 411 * the follow.
432 */ 412 */
413 spin_lock(&dentry->d_lock);
433 if ((!d_mountpoint(dentry) && 414 if ((!d_mountpoint(dentry) &&
434 !list_empty(&dentry->d_subdirs)) || 415 !list_empty(&dentry->d_subdirs)) ||
435 (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))) 416 (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)))
@@ -455,6 +436,8 @@ int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
455 436
456 /* The daemon never waits. */ 437 /* The daemon never waits. */
457 if (autofs4_oz_mode(sbi)) { 438 if (autofs4_oz_mode(sbi)) {
439 if (rcu_walk)
440 return 0;
458 if (!d_mountpoint(dentry)) 441 if (!d_mountpoint(dentry))
459 return -EISDIR; 442 return -EISDIR;
460 return 0; 443 return 0;
@@ -612,12 +595,12 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
612 595
613 dir->i_mtime = CURRENT_TIME; 596 dir->i_mtime = CURRENT_TIME;
614 597
615 spin_lock(&autofs4_lock); 598 spin_lock(&sbi->lookup_lock);
616 autofs4_add_expiring(dentry); 599 __autofs4_add_expiring(dentry);
617 spin_lock(&dentry->d_lock); 600 spin_lock(&dentry->d_lock);
618 __d_drop(dentry); 601 __d_drop(dentry);
619 spin_unlock(&dentry->d_lock); 602 spin_unlock(&dentry->d_lock);
620 spin_unlock(&autofs4_lock); 603 spin_unlock(&sbi->lookup_lock);
621 604
622 return 0; 605 return 0;
623} 606}
@@ -686,20 +669,17 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
686 if (!autofs4_oz_mode(sbi)) 669 if (!autofs4_oz_mode(sbi))
687 return -EACCES; 670 return -EACCES;
688 671
689 spin_lock(&autofs4_lock);
690 spin_lock(&sbi->lookup_lock); 672 spin_lock(&sbi->lookup_lock);
691 spin_lock(&dentry->d_lock); 673 spin_lock(&dentry->d_lock);
692 if (!list_empty(&dentry->d_subdirs)) { 674 if (!list_empty(&dentry->d_subdirs)) {
693 spin_unlock(&dentry->d_lock); 675 spin_unlock(&dentry->d_lock);
694 spin_unlock(&sbi->lookup_lock); 676 spin_unlock(&sbi->lookup_lock);
695 spin_unlock(&autofs4_lock);
696 return -ENOTEMPTY; 677 return -ENOTEMPTY;
697 } 678 }
698 __autofs4_add_expiring(dentry); 679 __autofs4_add_expiring(dentry);
699 spin_unlock(&sbi->lookup_lock);
700 __d_drop(dentry); 680 __d_drop(dentry);
701 spin_unlock(&dentry->d_lock); 681 spin_unlock(&dentry->d_lock);
702 spin_unlock(&autofs4_lock); 682 spin_unlock(&sbi->lookup_lock);
703 683
704 if (sbi->version < 5) 684 if (sbi->version < 5)
705 autofs_clear_leaf_automount_flags(dentry); 685 autofs_clear_leaf_automount_flags(dentry);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 56010056b2e6..25435987d6ae 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -197,12 +197,12 @@ rename_retry:
197 197
198 seq = read_seqbegin(&rename_lock); 198 seq = read_seqbegin(&rename_lock);
199 rcu_read_lock(); 199 rcu_read_lock();
200 spin_lock(&autofs4_lock); 200 spin_lock(&sbi->fs_lock);
201 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) 201 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
202 len += tmp->d_name.len + 1; 202 len += tmp->d_name.len + 1;
203 203
204 if (!len || --len > NAME_MAX) { 204 if (!len || --len > NAME_MAX) {
205 spin_unlock(&autofs4_lock); 205 spin_unlock(&sbi->fs_lock);
206 rcu_read_unlock(); 206 rcu_read_unlock();
207 if (read_seqretry(&rename_lock, seq)) 207 if (read_seqretry(&rename_lock, seq))
208 goto rename_retry; 208 goto rename_retry;
@@ -218,7 +218,7 @@ rename_retry:
218 p -= tmp->d_name.len; 218 p -= tmp->d_name.len;
219 strncpy(p, tmp->d_name.name, tmp->d_name.len); 219 strncpy(p, tmp->d_name.name, tmp->d_name.len);
220 } 220 }
221 spin_unlock(&autofs4_lock); 221 spin_unlock(&sbi->fs_lock);
222 rcu_read_unlock(); 222 rcu_read_unlock();
223 if (read_seqretry(&rename_lock, seq)) 223 if (read_seqretry(&rename_lock, seq))
224 goto rename_retry; 224 goto rename_retry;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index b1d0c794747b..06457ed8f3e7 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -75,7 +75,6 @@ static const struct inode_operations befs_dir_inode_operations = {
75 75
76static const struct address_space_operations befs_aops = { 76static const struct address_space_operations befs_aops = {
77 .readpage = befs_readpage, 77 .readpage = befs_readpage,
78 .sync_page = block_sync_page,
79 .bmap = befs_bmap, 78 .bmap = befs_bmap,
80}; 79};
81 80
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 685ecff3ab31..b14cebfd9047 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -97,7 +97,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
97 if (!inode) 97 if (!inode)
98 return -ENOSPC; 98 return -ENOSPC;
99 mutex_lock(&info->bfs_lock); 99 mutex_lock(&info->bfs_lock);
100 ino = find_first_zero_bit(info->si_imap, info->si_lasti); 100 ino = find_first_zero_bit(info->si_imap, info->si_lasti + 1);
101 if (ino > info->si_lasti) { 101 if (ino > info->si_lasti) {
102 mutex_unlock(&info->bfs_lock); 102 mutex_unlock(&info->bfs_lock);
103 iput(inode); 103 iput(inode);
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index eb67edd0f8ea..f20e8a71062f 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -186,7 +186,6 @@ static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
186const struct address_space_operations bfs_aops = { 186const struct address_space_operations bfs_aops = {
187 .readpage = bfs_readpage, 187 .readpage = bfs_readpage,
188 .writepage = bfs_writepage, 188 .writepage = bfs_writepage,
189 .sync_page = block_sync_page,
190 .write_begin = bfs_write_begin, 189 .write_begin = bfs_write_begin,
191 .write_end = generic_write_end, 190 .write_end = generic_write_end,
192 .bmap = bfs_bmap, 191 .bmap = bfs_bmap,
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d5b640ba6cb1..f34078d702d3 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -570,7 +570,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
570 unsigned long elf_entry; 570 unsigned long elf_entry;
571 unsigned long interp_load_addr = 0; 571 unsigned long interp_load_addr = 0;
572 unsigned long start_code, end_code, start_data, end_data; 572 unsigned long start_code, end_code, start_data, end_data;
573 unsigned long reloc_func_desc = 0; 573 unsigned long reloc_func_desc __maybe_unused = 0;
574 int executable_stack = EXSTACK_DEFAULT; 574 int executable_stack = EXSTACK_DEFAULT;
575 unsigned long def_flags = 0; 575 unsigned long def_flags = 0;
576 struct { 576 struct {
@@ -1906,7 +1906,7 @@ static int elf_core_dump(struct coredump_params *cprm)
1906 segs = current->mm->map_count; 1906 segs = current->mm->map_count;
1907 segs += elf_core_extra_phdrs(); 1907 segs += elf_core_extra_phdrs();
1908 1908
1909 gate_vma = get_gate_vma(current); 1909 gate_vma = get_gate_vma(current->mm);
1910 if (gate_vma != NULL) 1910 if (gate_vma != NULL)
1911 segs++; 1911 segs++;
1912 1912
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index e49cce234c65..9c5e6b2cd11a 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -761,6 +761,9 @@ int bioset_integrity_create(struct bio_set *bs, int pool_size)
761{ 761{
762 unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES); 762 unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES);
763 763
764 if (bs->bio_integrity_pool)
765 return 0;
766
764 bs->bio_integrity_pool = 767 bs->bio_integrity_pool =
765 mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab); 768 mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab);
766 769
diff --git a/fs/bio.c b/fs/bio.c
index 4bd454fa844e..4d6d4b6c2bf1 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -43,7 +43,7 @@ static mempool_t *bio_split_pool __read_mostly;
43 * unsigned short 43 * unsigned short
44 */ 44 */
45#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } 45#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
46struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { 46static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
47 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), 47 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
48}; 48};
49#undef BV 49#undef BV
@@ -111,7 +111,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
111 if (!slab) 111 if (!slab)
112 goto out_unlock; 112 goto out_unlock;
113 113
114 printk("bio: create slab <%s> at %d\n", bslab->name, entry); 114 printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry);
115 bslab->slab = slab; 115 bslab->slab = slab;
116 bslab->slab_ref = 1; 116 bslab->slab_ref = 1;
117 bslab->slab_size = sz; 117 bslab->slab_size = sz;
@@ -1636,9 +1636,6 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1636 if (!bs->bio_pool) 1636 if (!bs->bio_pool)
1637 goto bad; 1637 goto bad;
1638 1638
1639 if (bioset_integrity_create(bs, pool_size))
1640 goto bad;
1641
1642 if (!biovec_create_pools(bs, pool_size)) 1639 if (!biovec_create_pools(bs, pool_size))
1643 return bs; 1640 return bs;
1644 1641
@@ -1656,12 +1653,10 @@ static void __init biovec_init_slabs(void)
1656 int size; 1653 int size;
1657 struct biovec_slab *bvs = bvec_slabs + i; 1654 struct biovec_slab *bvs = bvec_slabs + i;
1658 1655
1659#ifndef CONFIG_BLK_DEV_INTEGRITY
1660 if (bvs->nr_vecs <= BIO_INLINE_VECS) { 1656 if (bvs->nr_vecs <= BIO_INLINE_VECS) {
1661 bvs->slab = NULL; 1657 bvs->slab = NULL;
1662 continue; 1658 continue;
1663 } 1659 }
1664#endif
1665 1660
1666 size = bvs->nr_vecs * sizeof(struct bio_vec); 1661 size = bvs->nr_vecs * sizeof(struct bio_vec);
1667 bvs->slab = kmem_cache_create(bvs->name, size, 0, 1662 bvs->slab = kmem_cache_create(bvs->name, size, 0,
@@ -1684,6 +1679,9 @@ static int __init init_bio(void)
1684 if (!fs_bio_set) 1679 if (!fs_bio_set)
1685 panic("bio: can't allocate bios\n"); 1680 panic("bio: can't allocate bios\n");
1686 1681
1682 if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
1683 panic("bio: can't create integrity pool\n");
1684
1687 bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES, 1685 bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
1688 sizeof(struct bio_pair)); 1686 sizeof(struct bio_pair));
1689 if (!bio_split_pool) 1687 if (!bio_split_pool)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 889287019599..c1511c674f53 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -55,11 +55,13 @@ EXPORT_SYMBOL(I_BDEV);
55static void bdev_inode_switch_bdi(struct inode *inode, 55static void bdev_inode_switch_bdi(struct inode *inode,
56 struct backing_dev_info *dst) 56 struct backing_dev_info *dst)
57{ 57{
58 spin_lock(&inode_lock); 58 spin_lock(&inode_wb_list_lock);
59 spin_lock(&inode->i_lock);
59 inode->i_data.backing_dev_info = dst; 60 inode->i_data.backing_dev_info = dst;
60 if (inode->i_state & I_DIRTY) 61 if (inode->i_state & I_DIRTY)
61 list_move(&inode->i_wb_list, &dst->wb.b_dirty); 62 list_move(&inode->i_wb_list, &dst->wb.b_dirty);
62 spin_unlock(&inode_lock); 63 spin_unlock(&inode->i_lock);
64 spin_unlock(&inode_wb_list_lock);
63} 65}
64 66
65static sector_t max_block(struct block_device *bdev) 67static sector_t max_block(struct block_device *bdev)
@@ -1087,6 +1089,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1087 if (!disk) 1089 if (!disk)
1088 goto out; 1090 goto out;
1089 1091
1092 disk_block_events(disk);
1090 mutex_lock_nested(&bdev->bd_mutex, for_part); 1093 mutex_lock_nested(&bdev->bd_mutex, for_part);
1091 if (!bdev->bd_openers) { 1094 if (!bdev->bd_openers) {
1092 bdev->bd_disk = disk; 1095 bdev->bd_disk = disk;
@@ -1108,10 +1111,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1108 */ 1111 */
1109 disk_put_part(bdev->bd_part); 1112 disk_put_part(bdev->bd_part);
1110 bdev->bd_part = NULL; 1113 bdev->bd_part = NULL;
1111 module_put(disk->fops->owner);
1112 put_disk(disk);
1113 bdev->bd_disk = NULL; 1114 bdev->bd_disk = NULL;
1114 mutex_unlock(&bdev->bd_mutex); 1115 mutex_unlock(&bdev->bd_mutex);
1116 disk_unblock_events(disk);
1117 module_put(disk->fops->owner);
1118 put_disk(disk);
1115 goto restart; 1119 goto restart;
1116 } 1120 }
1117 if (ret) 1121 if (ret)
@@ -1148,9 +1152,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1148 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); 1152 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1149 } 1153 }
1150 } else { 1154 } else {
1151 module_put(disk->fops->owner);
1152 put_disk(disk);
1153 disk = NULL;
1154 if (bdev->bd_contains == bdev) { 1155 if (bdev->bd_contains == bdev) {
1155 if (bdev->bd_disk->fops->open) { 1156 if (bdev->bd_disk->fops->open) {
1156 ret = bdev->bd_disk->fops->open(bdev, mode); 1157 ret = bdev->bd_disk->fops->open(bdev, mode);
@@ -1160,11 +1161,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1160 if (bdev->bd_invalidated) 1161 if (bdev->bd_invalidated)
1161 rescan_partitions(bdev->bd_disk, bdev); 1162 rescan_partitions(bdev->bd_disk, bdev);
1162 } 1163 }
1164 /* only one opener holds refs to the module and disk */
1165 module_put(disk->fops->owner);
1166 put_disk(disk);
1163 } 1167 }
1164 bdev->bd_openers++; 1168 bdev->bd_openers++;
1165 if (for_part) 1169 if (for_part)
1166 bdev->bd_part_count++; 1170 bdev->bd_part_count++;
1167 mutex_unlock(&bdev->bd_mutex); 1171 mutex_unlock(&bdev->bd_mutex);
1172 disk_unblock_events(disk);
1168 return 0; 1173 return 0;
1169 1174
1170 out_clear: 1175 out_clear:
@@ -1177,10 +1182,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1177 bdev->bd_contains = NULL; 1182 bdev->bd_contains = NULL;
1178 out_unlock_bdev: 1183 out_unlock_bdev:
1179 mutex_unlock(&bdev->bd_mutex); 1184 mutex_unlock(&bdev->bd_mutex);
1180 out: 1185 disk_unblock_events(disk);
1181 if (disk) 1186 module_put(disk->fops->owner);
1182 module_put(disk->fops->owner);
1183 put_disk(disk); 1187 put_disk(disk);
1188 out:
1184 bdput(bdev); 1189 bdput(bdev);
1185 1190
1186 return ret; 1191 return ret;
@@ -1446,14 +1451,13 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
1446 if (bdev_free) { 1451 if (bdev_free) {
1447 if (bdev->bd_write_holder) { 1452 if (bdev->bd_write_holder) {
1448 disk_unblock_events(bdev->bd_disk); 1453 disk_unblock_events(bdev->bd_disk);
1449 bdev->bd_write_holder = false;
1450 } else
1451 disk_check_events(bdev->bd_disk); 1454 disk_check_events(bdev->bd_disk);
1455 bdev->bd_write_holder = false;
1456 }
1452 } 1457 }
1453 1458
1454 mutex_unlock(&bdev->bd_mutex); 1459 mutex_unlock(&bdev->bd_mutex);
1455 } else 1460 }
1456 disk_check_events(bdev->bd_disk);
1457 1461
1458 return __blkdev_put(bdev, mode, 0); 1462 return __blkdev_put(bdev, mode, 0);
1459} 1463}
@@ -1527,7 +1531,6 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
1527static const struct address_space_operations def_blk_aops = { 1531static const struct address_space_operations def_blk_aops = {
1528 .readpage = blkdev_readpage, 1532 .readpage = blkdev_readpage,
1529 .writepage = blkdev_writepage, 1533 .writepage = blkdev_writepage,
1530 .sync_page = block_sync_page,
1531 .write_begin = blkdev_write_begin, 1534 .write_begin = blkdev_write_begin,
1532 .write_end = blkdev_write_end, 1535 .write_end = blkdev_write_end,
1533 .writepages = generic_writepages, 1536 .writepages = generic_writepages,
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 9c949348510b..de34bfad9ec3 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -170,7 +170,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
170 int ret; 170 int ret;
171 struct posix_acl *acl = NULL; 171 struct posix_acl *acl = NULL;
172 172
173 if (!is_owner_or_cap(dentry->d_inode)) 173 if (!inode_owner_or_capable(dentry->d_inode))
174 return -EPERM; 174 return -EPERM;
175 175
176 if (!IS_POSIXACL(dentry->d_inode)) 176 if (!IS_POSIXACL(dentry->d_inode))
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index ccc991c542df..57c3bb2884ce 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -136,9 +136,8 @@ struct btrfs_inode {
136 * items we think we'll end up using, and reserved_extents is the number 136 * items we think we'll end up using, and reserved_extents is the number
137 * of extent items we've reserved metadata for. 137 * of extent items we've reserved metadata for.
138 */ 138 */
139 spinlock_t accounting_lock;
140 atomic_t outstanding_extents; 139 atomic_t outstanding_extents;
141 int reserved_extents; 140 atomic_t reserved_extents;
142 141
143 /* 142 /*
144 * ordered_data_close is set by truncate when a file that used 143 * ordered_data_close is set by truncate when a file that used
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 4d2110eafe29..41d1d7c70e29 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -340,6 +340,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
340 340
341 WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); 341 WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
342 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); 342 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
343 if (!cb)
344 return -ENOMEM;
343 atomic_set(&cb->pending_bios, 0); 345 atomic_set(&cb->pending_bios, 0);
344 cb->errors = 0; 346 cb->errors = 0;
345 cb->inode = inode; 347 cb->inode = inode;
@@ -354,6 +356,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
354 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 356 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
355 357
356 bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); 358 bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
359 if(!bio) {
360 kfree(cb);
361 return -ENOMEM;
362 }
357 bio->bi_private = cb; 363 bio->bi_private = cb;
358 bio->bi_end_io = end_compressed_bio_write; 364 bio->bi_end_io = end_compressed_bio_write;
359 atomic_inc(&cb->pending_bios); 365 atomic_inc(&cb->pending_bios);
@@ -657,8 +663,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
657 atomic_inc(&cb->pending_bios); 663 atomic_inc(&cb->pending_bios);
658 664
659 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { 665 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
660 btrfs_lookup_bio_sums(root, inode, comp_bio, 666 ret = btrfs_lookup_bio_sums(root, inode,
661 sums); 667 comp_bio, sums);
668 BUG_ON(ret);
662 } 669 }
663 sums += (comp_bio->bi_size + root->sectorsize - 1) / 670 sums += (comp_bio->bi_size + root->sectorsize - 1) /
664 root->sectorsize; 671 root->sectorsize;
@@ -683,8 +690,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
683 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); 690 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
684 BUG_ON(ret); 691 BUG_ON(ret);
685 692
686 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) 693 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
687 btrfs_lookup_bio_sums(root, inode, comp_bio, sums); 694 ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
695 BUG_ON(ret);
696 }
688 697
689 ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); 698 ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
690 BUG_ON(ret); 699 BUG_ON(ret);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b5baff0dccfe..84d7ca1fe0ba 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -147,10 +147,11 @@ noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
147struct extent_buffer *btrfs_root_node(struct btrfs_root *root) 147struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
148{ 148{
149 struct extent_buffer *eb; 149 struct extent_buffer *eb;
150 spin_lock(&root->node_lock); 150
151 eb = root->node; 151 rcu_read_lock();
152 eb = rcu_dereference(root->node);
152 extent_buffer_get(eb); 153 extent_buffer_get(eb);
153 spin_unlock(&root->node_lock); 154 rcu_read_unlock();
154 return eb; 155 return eb;
155} 156}
156 157
@@ -165,14 +166,8 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
165 while (1) { 166 while (1) {
166 eb = btrfs_root_node(root); 167 eb = btrfs_root_node(root);
167 btrfs_tree_lock(eb); 168 btrfs_tree_lock(eb);
168 169 if (eb == root->node)
169 spin_lock(&root->node_lock);
170 if (eb == root->node) {
171 spin_unlock(&root->node_lock);
172 break; 170 break;
173 }
174 spin_unlock(&root->node_lock);
175
176 btrfs_tree_unlock(eb); 171 btrfs_tree_unlock(eb);
177 free_extent_buffer(eb); 172 free_extent_buffer(eb);
178 } 173 }
@@ -458,10 +453,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
458 else 453 else
459 parent_start = 0; 454 parent_start = 0;
460 455
461 spin_lock(&root->node_lock);
462 root->node = cow;
463 extent_buffer_get(cow); 456 extent_buffer_get(cow);
464 spin_unlock(&root->node_lock); 457 rcu_assign_pointer(root->node, cow);
465 458
466 btrfs_free_tree_block(trans, root, buf, parent_start, 459 btrfs_free_tree_block(trans, root, buf, parent_start,
467 last_ref); 460 last_ref);
@@ -542,6 +535,9 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
542 535
543 ret = __btrfs_cow_block(trans, root, buf, parent, 536 ret = __btrfs_cow_block(trans, root, buf, parent,
544 parent_slot, cow_ret, search_start, 0); 537 parent_slot, cow_ret, search_start, 0);
538
539 trace_btrfs_cow_block(root, buf, *cow_ret);
540
545 return ret; 541 return ret;
546} 542}
547 543
@@ -686,6 +682,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
686 if (!cur) { 682 if (!cur) {
687 cur = read_tree_block(root, blocknr, 683 cur = read_tree_block(root, blocknr,
688 blocksize, gen); 684 blocksize, gen);
685 if (!cur)
686 return -EIO;
689 } else if (!uptodate) { 687 } else if (!uptodate) {
690 btrfs_read_buffer(cur, gen); 688 btrfs_read_buffer(cur, gen);
691 } 689 }
@@ -732,122 +730,6 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root,
732 return btrfs_item_offset_nr(leaf, nr - 1); 730 return btrfs_item_offset_nr(leaf, nr - 1);
733} 731}
734 732
735/*
736 * extra debugging checks to make sure all the items in a key are
737 * well formed and in the proper order
738 */
739static int check_node(struct btrfs_root *root, struct btrfs_path *path,
740 int level)
741{
742 struct extent_buffer *parent = NULL;
743 struct extent_buffer *node = path->nodes[level];
744 struct btrfs_disk_key parent_key;
745 struct btrfs_disk_key node_key;
746 int parent_slot;
747 int slot;
748 struct btrfs_key cpukey;
749 u32 nritems = btrfs_header_nritems(node);
750
751 if (path->nodes[level + 1])
752 parent = path->nodes[level + 1];
753
754 slot = path->slots[level];
755 BUG_ON(nritems == 0);
756 if (parent) {
757 parent_slot = path->slots[level + 1];
758 btrfs_node_key(parent, &parent_key, parent_slot);
759 btrfs_node_key(node, &node_key, 0);
760 BUG_ON(memcmp(&parent_key, &node_key,
761 sizeof(struct btrfs_disk_key)));
762 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
763 btrfs_header_bytenr(node));
764 }
765 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
766 if (slot != 0) {
767 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
768 btrfs_node_key(node, &node_key, slot);
769 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
770 }
771 if (slot < nritems - 1) {
772 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
773 btrfs_node_key(node, &node_key, slot);
774 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
775 }
776 return 0;
777}
778
779/*
780 * extra checking to make sure all the items in a leaf are
781 * well formed and in the proper order
782 */
783static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
784 int level)
785{
786 struct extent_buffer *leaf = path->nodes[level];
787 struct extent_buffer *parent = NULL;
788 int parent_slot;
789 struct btrfs_key cpukey;
790 struct btrfs_disk_key parent_key;
791 struct btrfs_disk_key leaf_key;
792 int slot = path->slots[0];
793
794 u32 nritems = btrfs_header_nritems(leaf);
795
796 if (path->nodes[level + 1])
797 parent = path->nodes[level + 1];
798
799 if (nritems == 0)
800 return 0;
801
802 if (parent) {
803 parent_slot = path->slots[level + 1];
804 btrfs_node_key(parent, &parent_key, parent_slot);
805 btrfs_item_key(leaf, &leaf_key, 0);
806
807 BUG_ON(memcmp(&parent_key, &leaf_key,
808 sizeof(struct btrfs_disk_key)));
809 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
810 btrfs_header_bytenr(leaf));
811 }
812 if (slot != 0 && slot < nritems - 1) {
813 btrfs_item_key(leaf, &leaf_key, slot);
814 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
815 if (comp_keys(&leaf_key, &cpukey) <= 0) {
816 btrfs_print_leaf(root, leaf);
817 printk(KERN_CRIT "slot %d offset bad key\n", slot);
818 BUG_ON(1);
819 }
820 if (btrfs_item_offset_nr(leaf, slot - 1) !=
821 btrfs_item_end_nr(leaf, slot)) {
822 btrfs_print_leaf(root, leaf);
823 printk(KERN_CRIT "slot %d offset bad\n", slot);
824 BUG_ON(1);
825 }
826 }
827 if (slot < nritems - 1) {
828 btrfs_item_key(leaf, &leaf_key, slot);
829 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
830 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
831 if (btrfs_item_offset_nr(leaf, slot) !=
832 btrfs_item_end_nr(leaf, slot + 1)) {
833 btrfs_print_leaf(root, leaf);
834 printk(KERN_CRIT "slot %d offset bad\n", slot);
835 BUG_ON(1);
836 }
837 }
838 BUG_ON(btrfs_item_offset_nr(leaf, 0) +
839 btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
840 return 0;
841}
842
843static noinline int check_block(struct btrfs_root *root,
844 struct btrfs_path *path, int level)
845{
846 return 0;
847 if (level == 0)
848 return check_leaf(root, path, level);
849 return check_node(root, path, level);
850}
851 733
852/* 734/*
853 * search for key in the extent_buffer. The items start at offset p, 735 * search for key in the extent_buffer. The items start at offset p,
@@ -1046,9 +928,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1046 goto enospc; 928 goto enospc;
1047 } 929 }
1048 930
1049 spin_lock(&root->node_lock); 931 rcu_assign_pointer(root->node, child);
1050 root->node = child;
1051 spin_unlock(&root->node_lock);
1052 932
1053 add_root_to_dirty_list(root); 933 add_root_to_dirty_list(root);
1054 btrfs_tree_unlock(child); 934 btrfs_tree_unlock(child);
@@ -1188,7 +1068,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1188 } 1068 }
1189 } 1069 }
1190 /* double check we haven't messed things up */ 1070 /* double check we haven't messed things up */
1191 check_block(root, path, level);
1192 if (orig_ptr != 1071 if (orig_ptr !=
1193 btrfs_node_blockptr(path->nodes[level], path->slots[level])) 1072 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1194 BUG(); 1073 BUG();
@@ -1798,12 +1677,6 @@ cow_done:
1798 if (!cow) 1677 if (!cow)
1799 btrfs_unlock_up_safe(p, level + 1); 1678 btrfs_unlock_up_safe(p, level + 1);
1800 1679
1801 ret = check_block(root, p, level);
1802 if (ret) {
1803 ret = -1;
1804 goto done;
1805 }
1806
1807 ret = bin_search(b, key, level, &slot); 1680 ret = bin_search(b, key, level, &slot);
1808 1681
1809 if (level != 0) { 1682 if (level != 0) {
@@ -2130,10 +2003,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2130 2003
2131 btrfs_mark_buffer_dirty(c); 2004 btrfs_mark_buffer_dirty(c);
2132 2005
2133 spin_lock(&root->node_lock);
2134 old = root->node; 2006 old = root->node;
2135 root->node = c; 2007 rcu_assign_pointer(root->node, c);
2136 spin_unlock(&root->node_lock);
2137 2008
2138 /* the super has an extra ref to root->node */ 2009 /* the super has an extra ref to root->node */
2139 free_extent_buffer(old); 2010 free_extent_buffer(old);
@@ -3840,7 +3711,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
3840 unsigned long ptr; 3711 unsigned long ptr;
3841 3712
3842 path = btrfs_alloc_path(); 3713 path = btrfs_alloc_path();
3843 BUG_ON(!path); 3714 if (!path)
3715 return -ENOMEM;
3844 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); 3716 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
3845 if (!ret) { 3717 if (!ret) {
3846 leaf = path->nodes[0]; 3718 leaf = path->nodes[0];
@@ -4217,6 +4089,7 @@ find_next_key:
4217 } 4089 }
4218 btrfs_set_path_blocking(path); 4090 btrfs_set_path_blocking(path);
4219 cur = read_node_slot(root, cur, slot); 4091 cur = read_node_slot(root, cur, slot);
4092 BUG_ON(!cur);
4220 4093
4221 btrfs_tree_lock(cur); 4094 btrfs_tree_lock(cur);
4222 4095
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7f78cc78fdd0..d47ce8307854 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -28,6 +28,7 @@
28#include <linux/wait.h> 28#include <linux/wait.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/kobject.h> 30#include <linux/kobject.h>
31#include <trace/events/btrfs.h>
31#include <asm/kmap_types.h> 32#include <asm/kmap_types.h>
32#include "extent_io.h" 33#include "extent_io.h"
33#include "extent_map.h" 34#include "extent_map.h"
@@ -40,6 +41,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep;
40extern struct kmem_cache *btrfs_transaction_cachep; 41extern struct kmem_cache *btrfs_transaction_cachep;
41extern struct kmem_cache *btrfs_bit_radix_cachep; 42extern struct kmem_cache *btrfs_bit_radix_cachep;
42extern struct kmem_cache *btrfs_path_cachep; 43extern struct kmem_cache *btrfs_path_cachep;
44extern struct kmem_cache *btrfs_free_space_cachep;
43struct btrfs_ordered_sum; 45struct btrfs_ordered_sum;
44 46
45#define BTRFS_MAGIC "_BHRfS_M" 47#define BTRFS_MAGIC "_BHRfS_M"
@@ -782,9 +784,6 @@ struct btrfs_free_cluster {
782 /* first extent starting offset */ 784 /* first extent starting offset */
783 u64 window_start; 785 u64 window_start;
784 786
785 /* if this cluster simply points at a bitmap in the block group */
786 bool points_to_bitmap;
787
788 struct btrfs_block_group_cache *block_group; 787 struct btrfs_block_group_cache *block_group;
789 /* 788 /*
790 * when a cluster is allocated from a block group, we put the 789 * when a cluster is allocated from a block group, we put the
@@ -1283,6 +1282,7 @@ struct btrfs_root {
1283#define BTRFS_INODE_NODUMP (1 << 8) 1282#define BTRFS_INODE_NODUMP (1 << 8)
1284#define BTRFS_INODE_NOATIME (1 << 9) 1283#define BTRFS_INODE_NOATIME (1 << 9)
1285#define BTRFS_INODE_DIRSYNC (1 << 10) 1284#define BTRFS_INODE_DIRSYNC (1 << 10)
1285#define BTRFS_INODE_COMPRESS (1 << 11)
1286 1286
1287/* some macros to generate set/get funcs for the struct fields. This 1287/* some macros to generate set/get funcs for the struct fields. This
1288 * assumes there is a lefoo_to_cpu for every type, so lets make a simple 1288 * assumes there is a lefoo_to_cpu for every type, so lets make a simple
@@ -2157,6 +2157,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
2157 u64 root_objectid, u64 owner, u64 offset); 2157 u64 root_objectid, u64 owner, u64 offset);
2158 2158
2159int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); 2159int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
2160int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
2161 u64 num_bytes, int reserve, int sinfo);
2160int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 2162int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
2161 struct btrfs_root *root); 2163 struct btrfs_root *root);
2162int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 2164int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
@@ -2227,10 +2229,12 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
2227int btrfs_error_unpin_extent_range(struct btrfs_root *root, 2229int btrfs_error_unpin_extent_range(struct btrfs_root *root,
2228 u64 start, u64 end); 2230 u64 start, u64 end);
2229int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 2231int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
2230 u64 num_bytes); 2232 u64 num_bytes, u64 *actual_bytes);
2231int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, 2233int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
2232 struct btrfs_root *root, u64 type); 2234 struct btrfs_root *root, u64 type);
2235int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
2233 2236
2237int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
2234/* ctree.c */ 2238/* ctree.c */
2235int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2239int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
2236 int level, int *slot); 2240 int level, int *slot);
@@ -2392,6 +2396,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
2392 struct btrfs_path *path, u64 dir, 2396 struct btrfs_path *path, u64 dir,
2393 const char *name, u16 name_len, 2397 const char *name, u16 name_len,
2394 int mod); 2398 int mod);
2399int verify_dir_item(struct btrfs_root *root,
2400 struct extent_buffer *leaf,
2401 struct btrfs_dir_item *dir_item);
2395 2402
2396/* orphan.c */ 2403/* orphan.c */
2397int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, 2404int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
@@ -2528,7 +2535,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans,
2528 struct inode *inode); 2535 struct inode *inode);
2529int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); 2536int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
2530int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); 2537int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
2531void btrfs_orphan_cleanup(struct btrfs_root *root); 2538int btrfs_orphan_cleanup(struct btrfs_root *root);
2532void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, 2539void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
2533 struct btrfs_pending_snapshot *pending, 2540 struct btrfs_pending_snapshot *pending,
2534 u64 *bytes_to_reserve); 2541 u64 *bytes_to_reserve);
@@ -2536,7 +2543,7 @@ void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
2536 struct btrfs_pending_snapshot *pending); 2543 struct btrfs_pending_snapshot *pending);
2537void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, 2544void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
2538 struct btrfs_root *root); 2545 struct btrfs_root *root);
2539int btrfs_cont_expand(struct inode *inode, loff_t size); 2546int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
2540int btrfs_invalidate_inodes(struct btrfs_root *root); 2547int btrfs_invalidate_inodes(struct btrfs_root *root);
2541void btrfs_add_delayed_iput(struct inode *inode); 2548void btrfs_add_delayed_iput(struct inode *inode);
2542void btrfs_run_delayed_iputs(struct btrfs_root *root); 2549void btrfs_run_delayed_iputs(struct btrfs_root *root);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index e807b143b857..bce28f653899 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -483,6 +483,8 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
483 INIT_LIST_HEAD(&head_ref->cluster); 483 INIT_LIST_HEAD(&head_ref->cluster);
484 mutex_init(&head_ref->mutex); 484 mutex_init(&head_ref->mutex);
485 485
486 trace_btrfs_delayed_ref_head(ref, head_ref, action);
487
486 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 488 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
487 489
488 if (existing) { 490 if (existing) {
@@ -537,6 +539,8 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
537 } 539 }
538 full_ref->level = level; 540 full_ref->level = level;
539 541
542 trace_btrfs_delayed_tree_ref(ref, full_ref, action);
543
540 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 544 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
541 545
542 if (existing) { 546 if (existing) {
@@ -591,6 +595,8 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
591 full_ref->objectid = owner; 595 full_ref->objectid = owner;
592 full_ref->offset = offset; 596 full_ref->offset = offset;
593 597
598 trace_btrfs_delayed_data_ref(ref, full_ref, action);
599
594 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 600 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
595 601
596 if (existing) { 602 if (existing) {
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index f0cad5ae5be7..c62f02f6ae69 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -151,7 +151,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
151 ret = PTR_ERR(dir_item); 151 ret = PTR_ERR(dir_item);
152 if (ret == -EEXIST) 152 if (ret == -EEXIST)
153 goto second_insert; 153 goto second_insert;
154 goto out; 154 goto out_free;
155 } 155 }
156 156
157 leaf = path->nodes[0]; 157 leaf = path->nodes[0];
@@ -170,7 +170,7 @@ second_insert:
170 /* FIXME, use some real flag for selecting the extra index */ 170 /* FIXME, use some real flag for selecting the extra index */
171 if (root == root->fs_info->tree_root) { 171 if (root == root->fs_info->tree_root) {
172 ret = 0; 172 ret = 0;
173 goto out; 173 goto out_free;
174 } 174 }
175 btrfs_release_path(root, path); 175 btrfs_release_path(root, path);
176 176
@@ -180,7 +180,7 @@ second_insert:
180 name, name_len); 180 name, name_len);
181 if (IS_ERR(dir_item)) { 181 if (IS_ERR(dir_item)) {
182 ret2 = PTR_ERR(dir_item); 182 ret2 = PTR_ERR(dir_item);
183 goto out; 183 goto out_free;
184 } 184 }
185 leaf = path->nodes[0]; 185 leaf = path->nodes[0];
186 btrfs_cpu_key_to_disk(&disk_key, location); 186 btrfs_cpu_key_to_disk(&disk_key, location);
@@ -192,7 +192,9 @@ second_insert:
192 name_ptr = (unsigned long)(dir_item + 1); 192 name_ptr = (unsigned long)(dir_item + 1);
193 write_extent_buffer(leaf, name, name_ptr, name_len); 193 write_extent_buffer(leaf, name, name_ptr, name_len);
194 btrfs_mark_buffer_dirty(leaf); 194 btrfs_mark_buffer_dirty(leaf);
195out: 195
196out_free:
197
196 btrfs_free_path(path); 198 btrfs_free_path(path);
197 if (ret) 199 if (ret)
198 return ret; 200 return ret;
@@ -377,6 +379,9 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
377 379
378 leaf = path->nodes[0]; 380 leaf = path->nodes[0];
379 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); 381 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
382 if (verify_dir_item(root, leaf, dir_item))
383 return NULL;
384
380 total_len = btrfs_item_size_nr(leaf, path->slots[0]); 385 total_len = btrfs_item_size_nr(leaf, path->slots[0]);
381 while (cur < total_len) { 386 while (cur < total_len) {
382 this_len = sizeof(*dir_item) + 387 this_len = sizeof(*dir_item) +
@@ -429,3 +434,35 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
429 } 434 }
430 return ret; 435 return ret;
431} 436}
437
438int verify_dir_item(struct btrfs_root *root,
439 struct extent_buffer *leaf,
440 struct btrfs_dir_item *dir_item)
441{
442 u16 namelen = BTRFS_NAME_LEN;
443 u8 type = btrfs_dir_type(leaf, dir_item);
444
445 if (type >= BTRFS_FT_MAX) {
446 printk(KERN_CRIT "btrfs: invalid dir item type: %d\n",
447 (int)type);
448 return 1;
449 }
450
451 if (type == BTRFS_FT_XATTR)
452 namelen = XATTR_NAME_MAX;
453
454 if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
455 printk(KERN_CRIT "btrfS: invalid dir item name len: %u\n",
456 (unsigned)btrfs_dir_data_len(leaf, dir_item));
457 return 1;
458 }
459
460 /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
461 if (btrfs_dir_data_len(leaf, dir_item) > BTRFS_MAX_XATTR_SIZE(root)) {
462 printk(KERN_CRIT "btrfs: invalid dir item data len: %u\n",
463 (unsigned)btrfs_dir_data_len(leaf, dir_item));
464 return 1;
465 }
466
467 return 0;
468}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 100b07f021b4..d7a7315bd031 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -29,6 +29,7 @@
29#include <linux/crc32c.h> 29#include <linux/crc32c.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/migrate.h> 31#include <linux/migrate.h>
32#include <asm/unaligned.h>
32#include "compat.h" 33#include "compat.h"
33#include "ctree.h" 34#include "ctree.h"
34#include "disk-io.h" 35#include "disk-io.h"
@@ -198,7 +199,7 @@ u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
198 199
199void btrfs_csum_final(u32 crc, char *result) 200void btrfs_csum_final(u32 crc, char *result)
200{ 201{
201 *(__le32 *)result = ~cpu_to_le32(crc); 202 put_unaligned_le32(~crc, result);
202} 203}
203 204
204/* 205/*
@@ -323,6 +324,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
323 int num_copies = 0; 324 int num_copies = 0;
324 int mirror_num = 0; 325 int mirror_num = 0;
325 326
327 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
326 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; 328 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
327 while (1) { 329 while (1) {
328 ret = read_extent_buffer_pages(io_tree, eb, start, 1, 330 ret = read_extent_buffer_pages(io_tree, eb, start, 1,
@@ -331,6 +333,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
331 !verify_parent_transid(io_tree, eb, parent_transid)) 333 !verify_parent_transid(io_tree, eb, parent_transid))
332 return ret; 334 return ret;
333 335
336 /*
337 * This buffer's crc is fine, but its contents are corrupted, so
338 * there is no reason to read the other copies, they won't be
339 * any less wrong.
340 */
341 if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
342 return ret;
343
334 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, 344 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
335 eb->start, eb->len); 345 eb->start, eb->len);
336 if (num_copies == 1) 346 if (num_copies == 1)
@@ -419,6 +429,73 @@ static int check_tree_block_fsid(struct btrfs_root *root,
419 return ret; 429 return ret;
420} 430}
421 431
432#define CORRUPT(reason, eb, root, slot) \
433 printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
434 "root=%llu, slot=%d\n", reason, \
435 (unsigned long long)btrfs_header_bytenr(eb), \
436 (unsigned long long)root->objectid, slot)
437
438static noinline int check_leaf(struct btrfs_root *root,
439 struct extent_buffer *leaf)
440{
441 struct btrfs_key key;
442 struct btrfs_key leaf_key;
443 u32 nritems = btrfs_header_nritems(leaf);
444 int slot;
445
446 if (nritems == 0)
447 return 0;
448
449 /* Check the 0 item */
450 if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
451 BTRFS_LEAF_DATA_SIZE(root)) {
452 CORRUPT("invalid item offset size pair", leaf, root, 0);
453 return -EIO;
454 }
455
456 /*
457 * Check to make sure each items keys are in the correct order and their
458 * offsets make sense. We only have to loop through nritems-1 because
459 * we check the current slot against the next slot, which verifies the
460 * next slot's offset+size makes sense and that the current's slot
461 * offset is correct.
462 */
463 for (slot = 0; slot < nritems - 1; slot++) {
464 btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
465 btrfs_item_key_to_cpu(leaf, &key, slot + 1);
466
467 /* Make sure the keys are in the right order */
468 if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
469 CORRUPT("bad key order", leaf, root, slot);
470 return -EIO;
471 }
472
473 /*
474 * Make sure the offset and ends are right, remember that the
475 * item data starts at the end of the leaf and grows towards the
476 * front.
477 */
478 if (btrfs_item_offset_nr(leaf, slot) !=
479 btrfs_item_end_nr(leaf, slot + 1)) {
480 CORRUPT("slot offset bad", leaf, root, slot);
481 return -EIO;
482 }
483
484 /*
485 * Check to make sure that we don't point outside of the leaf,
486 * just incase all the items are consistent to eachother, but
487 * all point outside of the leaf.
488 */
489 if (btrfs_item_end_nr(leaf, slot) >
490 BTRFS_LEAF_DATA_SIZE(root)) {
491 CORRUPT("slot end outside of leaf", leaf, root, slot);
492 return -EIO;
493 }
494 }
495
496 return 0;
497}
498
422#ifdef CONFIG_DEBUG_LOCK_ALLOC 499#ifdef CONFIG_DEBUG_LOCK_ALLOC
423void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) 500void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
424{ 501{
@@ -485,8 +562,20 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
485 btrfs_set_buffer_lockdep_class(eb, found_level); 562 btrfs_set_buffer_lockdep_class(eb, found_level);
486 563
487 ret = csum_tree_block(root, eb, 1); 564 ret = csum_tree_block(root, eb, 1);
488 if (ret) 565 if (ret) {
566 ret = -EIO;
567 goto err;
568 }
569
570 /*
571 * If this is a leaf block and it is corrupt, set the corrupt bit so
572 * that we don't try and read the other copies of this block, just
573 * return -EIO.
574 */
575 if (found_level == 0 && check_leaf(root, eb)) {
576 set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
489 ret = -EIO; 577 ret = -EIO;
578 }
490 579
491 end = min_t(u64, eb->len, PAGE_CACHE_SIZE); 580 end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
492 end = eb->start + end - 1; 581 end = eb->start + end - 1;
@@ -847,7 +936,6 @@ static const struct address_space_operations btree_aops = {
847 .writepages = btree_writepages, 936 .writepages = btree_writepages,
848 .releasepage = btree_releasepage, 937 .releasepage = btree_releasepage,
849 .invalidatepage = btree_invalidatepage, 938 .invalidatepage = btree_invalidatepage,
850 .sync_page = block_sync_page,
851#ifdef CONFIG_MIGRATION 939#ifdef CONFIG_MIGRATION
852 .migratepage = btree_migratepage, 940 .migratepage = btree_migratepage,
853#endif 941#endif
@@ -1160,7 +1248,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1160 root, fs_info, location->objectid); 1248 root, fs_info, location->objectid);
1161 1249
1162 path = btrfs_alloc_path(); 1250 path = btrfs_alloc_path();
1163 BUG_ON(!path); 1251 if (!path) {
1252 kfree(root);
1253 return ERR_PTR(-ENOMEM);
1254 }
1164 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); 1255 ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
1165 if (ret == 0) { 1256 if (ret == 0) {
1166 l = path->nodes[0]; 1257 l = path->nodes[0];
@@ -1331,82 +1422,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1331} 1422}
1332 1423
1333/* 1424/*
1334 * this unplugs every device on the box, and it is only used when page
1335 * is null
1336 */
1337static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1338{
1339 struct btrfs_device *device;
1340 struct btrfs_fs_info *info;
1341
1342 info = (struct btrfs_fs_info *)bdi->unplug_io_data;
1343 list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
1344 if (!device->bdev)
1345 continue;
1346
1347 bdi = blk_get_backing_dev_info(device->bdev);
1348 if (bdi->unplug_io_fn)
1349 bdi->unplug_io_fn(bdi, page);
1350 }
1351}
1352
1353static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1354{
1355 struct inode *inode;
1356 struct extent_map_tree *em_tree;
1357 struct extent_map *em;
1358 struct address_space *mapping;
1359 u64 offset;
1360
1361 /* the generic O_DIRECT read code does this */
1362 if (1 || !page) {
1363 __unplug_io_fn(bdi, page);
1364 return;
1365 }
1366
1367 /*
1368 * page->mapping may change at any time. Get a consistent copy
1369 * and use that for everything below
1370 */
1371 smp_mb();
1372 mapping = page->mapping;
1373 if (!mapping)
1374 return;
1375
1376 inode = mapping->host;
1377
1378 /*
1379 * don't do the expensive searching for a small number of
1380 * devices
1381 */
1382 if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) {
1383 __unplug_io_fn(bdi, page);
1384 return;
1385 }
1386
1387 offset = page_offset(page);
1388
1389 em_tree = &BTRFS_I(inode)->extent_tree;
1390 read_lock(&em_tree->lock);
1391 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
1392 read_unlock(&em_tree->lock);
1393 if (!em) {
1394 __unplug_io_fn(bdi, page);
1395 return;
1396 }
1397
1398 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
1399 free_extent_map(em);
1400 __unplug_io_fn(bdi, page);
1401 return;
1402 }
1403 offset = offset - em->start;
1404 btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree,
1405 em->block_start + offset, page);
1406 free_extent_map(em);
1407}
1408
1409/*
1410 * If this fails, caller must call bdi_destroy() to get rid of the 1425 * If this fails, caller must call bdi_destroy() to get rid of the
1411 * bdi again. 1426 * bdi again.
1412 */ 1427 */
@@ -1420,8 +1435,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1420 return err; 1435 return err;
1421 1436
1422 bdi->ra_pages = default_backing_dev_info.ra_pages; 1437 bdi->ra_pages = default_backing_dev_info.ra_pages;
1423 bdi->unplug_io_fn = btrfs_unplug_io_fn;
1424 bdi->unplug_io_data = info;
1425 bdi->congested_fn = btrfs_congested_fn; 1438 bdi->congested_fn = btrfs_congested_fn;
1426 bdi->congested_data = info; 1439 bdi->congested_data = info;
1427 return 0; 1440 return 0;
@@ -1632,6 +1645,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1632 goto fail_bdi; 1645 goto fail_bdi;
1633 } 1646 }
1634 1647
1648 fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS;
1649
1635 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 1650 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
1636 INIT_LIST_HEAD(&fs_info->trans_list); 1651 INIT_LIST_HEAD(&fs_info->trans_list);
1637 INIT_LIST_HEAD(&fs_info->dead_roots); 1652 INIT_LIST_HEAD(&fs_info->dead_roots);
@@ -1762,6 +1777,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1762 1777
1763 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); 1778 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
1764 1779
1780 /*
1781 * In the long term, we'll store the compression type in the super
1782 * block, and it'll be used for per file compression control.
1783 */
1784 fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
1785
1765 ret = btrfs_parse_options(tree_root, options); 1786 ret = btrfs_parse_options(tree_root, options);
1766 if (ret) { 1787 if (ret) {
1767 err = ret; 1788 err = ret;
@@ -1967,6 +1988,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1967 fs_info->metadata_alloc_profile = (u64)-1; 1988 fs_info->metadata_alloc_profile = (u64)-1;
1968 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; 1989 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
1969 1990
1991 ret = btrfs_init_space_info(fs_info);
1992 if (ret) {
1993 printk(KERN_ERR "Failed to initial space info: %d\n", ret);
1994 goto fail_block_groups;
1995 }
1996
1970 ret = btrfs_read_block_groups(extent_root); 1997 ret = btrfs_read_block_groups(extent_root);
1971 if (ret) { 1998 if (ret) {
1972 printk(KERN_ERR "Failed to read block groups: %d\n", ret); 1999 printk(KERN_ERR "Failed to read block groups: %d\n", ret);
@@ -2058,9 +2085,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
2058 2085
2059 if (!(sb->s_flags & MS_RDONLY)) { 2086 if (!(sb->s_flags & MS_RDONLY)) {
2060 down_read(&fs_info->cleanup_work_sem); 2087 down_read(&fs_info->cleanup_work_sem);
2061 btrfs_orphan_cleanup(fs_info->fs_root); 2088 err = btrfs_orphan_cleanup(fs_info->fs_root);
2062 btrfs_orphan_cleanup(fs_info->tree_root); 2089 if (!err)
2090 err = btrfs_orphan_cleanup(fs_info->tree_root);
2063 up_read(&fs_info->cleanup_work_sem); 2091 up_read(&fs_info->cleanup_work_sem);
2092 if (err) {
2093 close_ctree(tree_root);
2094 return ERR_PTR(err);
2095 }
2064 } 2096 }
2065 2097
2066 return tree_root; 2098 return tree_root;
@@ -2435,8 +2467,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
2435 2467
2436 root_objectid = gang[ret - 1]->root_key.objectid + 1; 2468 root_objectid = gang[ret - 1]->root_key.objectid + 1;
2437 for (i = 0; i < ret; i++) { 2469 for (i = 0; i < ret; i++) {
2470 int err;
2471
2438 root_objectid = gang[i]->root_key.objectid; 2472 root_objectid = gang[i]->root_key.objectid;
2439 btrfs_orphan_cleanup(gang[i]); 2473 err = btrfs_orphan_cleanup(gang[i]);
2474 if (err)
2475 return err;
2440 } 2476 }
2441 root_objectid++; 2477 root_objectid++;
2442 } 2478 }
@@ -2947,7 +2983,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
2947 break; 2983 break;
2948 2984
2949 /* opt_discard */ 2985 /* opt_discard */
2950 ret = btrfs_error_discard_extent(root, start, end + 1 - start); 2986 if (btrfs_test_opt(root, DISCARD))
2987 ret = btrfs_error_discard_extent(root, start,
2988 end + 1 - start,
2989 NULL);
2951 2990
2952 clear_extent_dirty(unpin, start, end, GFP_NOFS); 2991 clear_extent_dirty(unpin, start, end, GFP_NOFS);
2953 btrfs_error_unpin_extent_range(root, start, end); 2992 btrfs_error_unpin_extent_range(root, start, end);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7b3089b5c2df..f619c3cb13b7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -36,8 +36,6 @@
36static int update_block_group(struct btrfs_trans_handle *trans, 36static int update_block_group(struct btrfs_trans_handle *trans,
37 struct btrfs_root *root, 37 struct btrfs_root *root,
38 u64 bytenr, u64 num_bytes, int alloc); 38 u64 bytenr, u64 num_bytes, int alloc);
39static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
40 u64 num_bytes, int reserve, int sinfo);
41static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 39static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
42 struct btrfs_root *root, 40 struct btrfs_root *root,
43 u64 bytenr, u64 num_bytes, u64 parent, 41 u64 bytenr, u64 num_bytes, u64 parent,
@@ -442,7 +440,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
442 * allocate blocks for the tree root we can't do the fast caching since 440 * allocate blocks for the tree root we can't do the fast caching since
443 * we likely hold important locks. 441 * we likely hold important locks.
444 */ 442 */
445 if (!trans->transaction->in_commit && 443 if (trans && (!trans->transaction->in_commit) &&
446 (root && root != root->fs_info->tree_root)) { 444 (root && root != root->fs_info->tree_root)) {
447 spin_lock(&cache->lock); 445 spin_lock(&cache->lock);
448 if (cache->cached != BTRFS_CACHE_NO) { 446 if (cache->cached != BTRFS_CACHE_NO) {
@@ -471,7 +469,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
471 if (load_cache_only) 469 if (load_cache_only)
472 return 0; 470 return 0;
473 471
474 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); 472 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
475 BUG_ON(!caching_ctl); 473 BUG_ON(!caching_ctl);
476 474
477 INIT_LIST_HEAD(&caching_ctl->list); 475 INIT_LIST_HEAD(&caching_ctl->list);
@@ -1740,39 +1738,45 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1740 return ret; 1738 return ret;
1741} 1739}
1742 1740
1743static void btrfs_issue_discard(struct block_device *bdev, 1741static int btrfs_issue_discard(struct block_device *bdev,
1744 u64 start, u64 len) 1742 u64 start, u64 len)
1745{ 1743{
1746 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0); 1744 return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
1747} 1745}
1748 1746
1749static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1747static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1750 u64 num_bytes) 1748 u64 num_bytes, u64 *actual_bytes)
1751{ 1749{
1752 int ret; 1750 int ret;
1753 u64 map_length = num_bytes; 1751 u64 discarded_bytes = 0;
1754 struct btrfs_multi_bio *multi = NULL; 1752 struct btrfs_multi_bio *multi = NULL;
1755 1753
1756 if (!btrfs_test_opt(root, DISCARD))
1757 return 0;
1758 1754
1759 /* Tell the block device(s) that the sectors can be discarded */ 1755 /* Tell the block device(s) that the sectors can be discarded */
1760 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 1756 ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD,
1761 bytenr, &map_length, &multi, 0); 1757 bytenr, &num_bytes, &multi, 0);
1762 if (!ret) { 1758 if (!ret) {
1763 struct btrfs_bio_stripe *stripe = multi->stripes; 1759 struct btrfs_bio_stripe *stripe = multi->stripes;
1764 int i; 1760 int i;
1765 1761
1766 if (map_length > num_bytes)
1767 map_length = num_bytes;
1768 1762
1769 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1763 for (i = 0; i < multi->num_stripes; i++, stripe++) {
1770 btrfs_issue_discard(stripe->dev->bdev, 1764 ret = btrfs_issue_discard(stripe->dev->bdev,
1771 stripe->physical, 1765 stripe->physical,
1772 map_length); 1766 stripe->length);
1767 if (!ret)
1768 discarded_bytes += stripe->length;
1769 else if (ret != -EOPNOTSUPP)
1770 break;
1773 } 1771 }
1774 kfree(multi); 1772 kfree(multi);
1775 } 1773 }
1774 if (discarded_bytes && ret == -EOPNOTSUPP)
1775 ret = 0;
1776
1777 if (actual_bytes)
1778 *actual_bytes = discarded_bytes;
1779
1776 1780
1777 return ret; 1781 return ret;
1778} 1782}
@@ -3996,6 +4000,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3996 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 4000 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
3997 u64 to_reserve; 4001 u64 to_reserve;
3998 int nr_extents; 4002 int nr_extents;
4003 int reserved_extents;
3999 int ret; 4004 int ret;
4000 4005
4001 if (btrfs_transaction_in_commit(root->fs_info)) 4006 if (btrfs_transaction_in_commit(root->fs_info))
@@ -4003,25 +4008,24 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4003 4008
4004 num_bytes = ALIGN(num_bytes, root->sectorsize); 4009 num_bytes = ALIGN(num_bytes, root->sectorsize);
4005 4010
4006 spin_lock(&BTRFS_I(inode)->accounting_lock);
4007 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; 4011 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
4008 if (nr_extents > BTRFS_I(inode)->reserved_extents) { 4012 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
4009 nr_extents -= BTRFS_I(inode)->reserved_extents; 4013
4014 if (nr_extents > reserved_extents) {
4015 nr_extents -= reserved_extents;
4010 to_reserve = calc_trans_metadata_size(root, nr_extents); 4016 to_reserve = calc_trans_metadata_size(root, nr_extents);
4011 } else { 4017 } else {
4012 nr_extents = 0; 4018 nr_extents = 0;
4013 to_reserve = 0; 4019 to_reserve = 0;
4014 } 4020 }
4015 spin_unlock(&BTRFS_I(inode)->accounting_lock); 4021
4016 to_reserve += calc_csum_metadata_size(inode, num_bytes); 4022 to_reserve += calc_csum_metadata_size(inode, num_bytes);
4017 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 4023 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
4018 if (ret) 4024 if (ret)
4019 return ret; 4025 return ret;
4020 4026
4021 spin_lock(&BTRFS_I(inode)->accounting_lock); 4027 atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
4022 BTRFS_I(inode)->reserved_extents += nr_extents;
4023 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 4028 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
4024 spin_unlock(&BTRFS_I(inode)->accounting_lock);
4025 4029
4026 block_rsv_add_bytes(block_rsv, to_reserve, 1); 4030 block_rsv_add_bytes(block_rsv, to_reserve, 1);
4027 4031
@@ -4036,20 +4040,30 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4036 struct btrfs_root *root = BTRFS_I(inode)->root; 4040 struct btrfs_root *root = BTRFS_I(inode)->root;
4037 u64 to_free; 4041 u64 to_free;
4038 int nr_extents; 4042 int nr_extents;
4043 int reserved_extents;
4039 4044
4040 num_bytes = ALIGN(num_bytes, root->sectorsize); 4045 num_bytes = ALIGN(num_bytes, root->sectorsize);
4041 atomic_dec(&BTRFS_I(inode)->outstanding_extents); 4046 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
4042 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); 4047 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
4043 4048
4044 spin_lock(&BTRFS_I(inode)->accounting_lock); 4049 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
4045 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); 4050 do {
4046 if (nr_extents < BTRFS_I(inode)->reserved_extents) { 4051 int old, new;
4047 nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents; 4052
4048 BTRFS_I(inode)->reserved_extents -= nr_extents; 4053 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
4049 } else { 4054 if (nr_extents >= reserved_extents) {
4050 nr_extents = 0; 4055 nr_extents = 0;
4051 } 4056 break;
4052 spin_unlock(&BTRFS_I(inode)->accounting_lock); 4057 }
4058 old = reserved_extents;
4059 nr_extents = reserved_extents - nr_extents;
4060 new = reserved_extents - nr_extents;
4061 old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
4062 reserved_extents, new);
4063 if (likely(old == reserved_extents))
4064 break;
4065 reserved_extents = old;
4066 } while (1);
4053 4067
4054 to_free = calc_csum_metadata_size(inode, num_bytes); 4068 to_free = calc_csum_metadata_size(inode, num_bytes);
4055 if (nr_extents > 0) 4069 if (nr_extents > 0)
@@ -4223,8 +4237,8 @@ int btrfs_pin_extent(struct btrfs_root *root,
4223 * update size of reserved extents. this function may return -EAGAIN 4237 * update size of reserved extents. this function may return -EAGAIN
4224 * if 'reserve' is true or 'sinfo' is false. 4238 * if 'reserve' is true or 'sinfo' is false.
4225 */ 4239 */
4226static int update_reserved_bytes(struct btrfs_block_group_cache *cache, 4240int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
4227 u64 num_bytes, int reserve, int sinfo) 4241 u64 num_bytes, int reserve, int sinfo)
4228{ 4242{
4229 int ret = 0; 4243 int ret = 0;
4230 if (sinfo) { 4244 if (sinfo) {
@@ -4363,7 +4377,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
4363 if (ret) 4377 if (ret)
4364 break; 4378 break;
4365 4379
4366 ret = btrfs_discard_extent(root, start, end + 1 - start); 4380 if (btrfs_test_opt(root, DISCARD))
4381 ret = btrfs_discard_extent(root, start,
4382 end + 1 - start, NULL);
4367 4383
4368 clear_extent_dirty(unpin, start, end, GFP_NOFS); 4384 clear_extent_dirty(unpin, start, end, GFP_NOFS);
4369 unpin_extent_range(root, start, end); 4385 unpin_extent_range(root, start, end);
@@ -4704,10 +4720,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4704 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 4720 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
4705 4721
4706 btrfs_add_free_space(cache, buf->start, buf->len); 4722 btrfs_add_free_space(cache, buf->start, buf->len);
4707 ret = update_reserved_bytes(cache, buf->len, 0, 0); 4723 ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0);
4708 if (ret == -EAGAIN) { 4724 if (ret == -EAGAIN) {
4709 /* block group became read-only */ 4725 /* block group became read-only */
4710 update_reserved_bytes(cache, buf->len, 0, 1); 4726 btrfs_update_reserved_bytes(cache, buf->len, 0, 1);
4711 goto out; 4727 goto out;
4712 } 4728 }
4713 4729
@@ -4744,6 +4760,11 @@ pin:
4744 } 4760 }
4745 } 4761 }
4746out: 4762out:
4763 /*
4764 * Deleting the buffer, clear the corrupt flag since it doesn't matter
4765 * anymore.
4766 */
4767 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
4747 btrfs_put_block_group(cache); 4768 btrfs_put_block_group(cache);
4748} 4769}
4749 4770
@@ -5191,7 +5212,7 @@ checks:
5191 search_start - offset); 5212 search_start - offset);
5192 BUG_ON(offset > search_start); 5213 BUG_ON(offset > search_start);
5193 5214
5194 ret = update_reserved_bytes(block_group, num_bytes, 1, 5215 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1,
5195 (data & BTRFS_BLOCK_GROUP_DATA)); 5216 (data & BTRFS_BLOCK_GROUP_DATA));
5196 if (ret == -EAGAIN) { 5217 if (ret == -EAGAIN) {
5197 btrfs_add_free_space(block_group, offset, num_bytes); 5218 btrfs_add_free_space(block_group, offset, num_bytes);
@@ -5397,6 +5418,8 @@ again:
5397 dump_space_info(sinfo, num_bytes, 1); 5418 dump_space_info(sinfo, num_bytes, 1);
5398 } 5419 }
5399 5420
5421 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
5422
5400 return ret; 5423 return ret;
5401} 5424}
5402 5425
@@ -5412,12 +5435,15 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
5412 return -ENOSPC; 5435 return -ENOSPC;
5413 } 5436 }
5414 5437
5415 ret = btrfs_discard_extent(root, start, len); 5438 if (btrfs_test_opt(root, DISCARD))
5439 ret = btrfs_discard_extent(root, start, len, NULL);
5416 5440
5417 btrfs_add_free_space(cache, start, len); 5441 btrfs_add_free_space(cache, start, len);
5418 update_reserved_bytes(cache, len, 0, 1); 5442 btrfs_update_reserved_bytes(cache, len, 0, 1);
5419 btrfs_put_block_group(cache); 5443 btrfs_put_block_group(cache);
5420 5444
5445 trace_btrfs_reserved_extent_free(root, start, len);
5446
5421 return ret; 5447 return ret;
5422} 5448}
5423 5449
@@ -5444,7 +5470,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
5444 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); 5470 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
5445 5471
5446 path = btrfs_alloc_path(); 5472 path = btrfs_alloc_path();
5447 BUG_ON(!path); 5473 if (!path)
5474 return -ENOMEM;
5448 5475
5449 path->leave_spinning = 1; 5476 path->leave_spinning = 1;
5450 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 5477 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
@@ -5614,7 +5641,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5614 put_caching_control(caching_ctl); 5641 put_caching_control(caching_ctl);
5615 } 5642 }
5616 5643
5617 ret = update_reserved_bytes(block_group, ins->offset, 1, 1); 5644 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1);
5618 BUG_ON(ret); 5645 BUG_ON(ret);
5619 btrfs_put_block_group(block_group); 5646 btrfs_put_block_group(block_group);
5620 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 5647 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
@@ -6047,6 +6074,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
6047 if (reada && level == 1) 6074 if (reada && level == 1)
6048 reada_walk_down(trans, root, wc, path); 6075 reada_walk_down(trans, root, wc, path);
6049 next = read_tree_block(root, bytenr, blocksize, generation); 6076 next = read_tree_block(root, bytenr, blocksize, generation);
6077 if (!next)
6078 return -EIO;
6050 btrfs_tree_lock(next); 6079 btrfs_tree_lock(next);
6051 btrfs_set_lock_blocking(next); 6080 btrfs_set_lock_blocking(next);
6052 } 6081 }
@@ -6438,10 +6467,14 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
6438 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 6467 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
6439 6468
6440 path = btrfs_alloc_path(); 6469 path = btrfs_alloc_path();
6441 BUG_ON(!path); 6470 if (!path)
6471 return -ENOMEM;
6442 6472
6443 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6473 wc = kzalloc(sizeof(*wc), GFP_NOFS);
6444 BUG_ON(!wc); 6474 if (!wc) {
6475 btrfs_free_path(path);
6476 return -ENOMEM;
6477 }
6445 6478
6446 btrfs_assert_tree_locked(parent); 6479 btrfs_assert_tree_locked(parent);
6447 parent_level = btrfs_header_level(parent); 6480 parent_level = btrfs_header_level(parent);
@@ -6899,7 +6932,11 @@ static noinline int get_new_locations(struct inode *reloc_inode,
6899 } 6932 }
6900 6933
6901 path = btrfs_alloc_path(); 6934 path = btrfs_alloc_path();
6902 BUG_ON(!path); 6935 if (!path) {
6936 if (exts != *extents)
6937 kfree(exts);
6938 return -ENOMEM;
6939 }
6903 6940
6904 cur_pos = extent_key->objectid - offset; 6941 cur_pos = extent_key->objectid - offset;
6905 last_byte = extent_key->objectid + extent_key->offset; 6942 last_byte = extent_key->objectid + extent_key->offset;
@@ -6941,6 +6978,10 @@ static noinline int get_new_locations(struct inode *reloc_inode,
6941 struct disk_extent *old = exts; 6978 struct disk_extent *old = exts;
6942 max *= 2; 6979 max *= 2;
6943 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); 6980 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
6981 if (!exts) {
6982 ret = -ENOMEM;
6983 goto out;
6984 }
6944 memcpy(exts, old, sizeof(*exts) * nr); 6985 memcpy(exts, old, sizeof(*exts) * nr);
6945 if (old != *extents) 6986 if (old != *extents)
6946 kfree(old); 6987 kfree(old);
@@ -7423,7 +7464,8 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
7423 int ret; 7464 int ret;
7424 7465
7425 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); 7466 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
7426 BUG_ON(!new_extent); 7467 if (!new_extent)
7468 return -ENOMEM;
7427 7469
7428 ref = btrfs_lookup_leaf_ref(root, leaf->start); 7470 ref = btrfs_lookup_leaf_ref(root, leaf->start);
7429 BUG_ON(!ref); 7471 BUG_ON(!ref);
@@ -7609,7 +7651,8 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
7609 7651
7610 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 7652 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
7611 BUG_ON(!reloc_root); 7653 BUG_ON(!reloc_root);
7612 btrfs_orphan_cleanup(reloc_root); 7654 ret = btrfs_orphan_cleanup(reloc_root);
7655 BUG_ON(ret);
7613 return 0; 7656 return 0;
7614} 7657}
7615 7658
@@ -7627,7 +7670,8 @@ static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
7627 return 0; 7670 return 0;
7628 7671
7629 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 7672 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
7630 BUG_ON(!root_item); 7673 if (!root_item)
7674 return -ENOMEM;
7631 7675
7632 ret = btrfs_copy_root(trans, root, root->commit_root, 7676 ret = btrfs_copy_root(trans, root, root->commit_root,
7633 &eb, BTRFS_TREE_RELOC_OBJECTID); 7677 &eb, BTRFS_TREE_RELOC_OBJECTID);
@@ -7653,7 +7697,7 @@ static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
7653 7697
7654 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 7698 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
7655 &root_key); 7699 &root_key);
7656 BUG_ON(!reloc_root); 7700 BUG_ON(IS_ERR(reloc_root));
7657 reloc_root->last_trans = trans->transid; 7701 reloc_root->last_trans = trans->transid;
7658 reloc_root->commit_root = NULL; 7702 reloc_root->commit_root = NULL;
7659 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; 7703 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
@@ -7906,6 +7950,10 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root,
7906 7950
7907 eb = read_tree_block(found_root, block_start, 7951 eb = read_tree_block(found_root, block_start,
7908 block_size, 0); 7952 block_size, 0);
7953 if (!eb) {
7954 ret = -EIO;
7955 goto out;
7956 }
7909 btrfs_tree_lock(eb); 7957 btrfs_tree_lock(eb);
7910 BUG_ON(level != btrfs_header_level(eb)); 7958 BUG_ON(level != btrfs_header_level(eb));
7911 7959
@@ -8621,6 +8669,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8621 BUG_ON(!block_group); 8669 BUG_ON(!block_group);
8622 BUG_ON(!block_group->ro); 8670 BUG_ON(!block_group->ro);
8623 8671
8672 /*
8673 * Free the reserved super bytes from this block group before
8674 * remove it.
8675 */
8676 free_excluded_extents(root, block_group);
8677
8624 memcpy(&key, &block_group->key, sizeof(key)); 8678 memcpy(&key, &block_group->key, sizeof(key));
8625 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | 8679 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
8626 BTRFS_BLOCK_GROUP_RAID1 | 8680 BTRFS_BLOCK_GROUP_RAID1 |
@@ -8724,13 +8778,84 @@ out:
8724 return ret; 8778 return ret;
8725} 8779}
8726 8780
8781int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
8782{
8783 struct btrfs_space_info *space_info;
8784 int ret;
8785
8786 ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM, 0, 0,
8787 &space_info);
8788 if (ret)
8789 return ret;
8790
8791 ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA, 0, 0,
8792 &space_info);
8793 if (ret)
8794 return ret;
8795
8796 ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA, 0, 0,
8797 &space_info);
8798 if (ret)
8799 return ret;
8800
8801 return ret;
8802}
8803
8727int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 8804int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
8728{ 8805{
8729 return unpin_extent_range(root, start, end); 8806 return unpin_extent_range(root, start, end);
8730} 8807}
8731 8808
8732int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 8809int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
8733 u64 num_bytes) 8810 u64 num_bytes, u64 *actual_bytes)
8811{
8812 return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
8813}
8814
8815int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
8734{ 8816{
8735 return btrfs_discard_extent(root, bytenr, num_bytes); 8817 struct btrfs_fs_info *fs_info = root->fs_info;
8818 struct btrfs_block_group_cache *cache = NULL;
8819 u64 group_trimmed;
8820 u64 start;
8821 u64 end;
8822 u64 trimmed = 0;
8823 int ret = 0;
8824
8825 cache = btrfs_lookup_block_group(fs_info, range->start);
8826
8827 while (cache) {
8828 if (cache->key.objectid >= (range->start + range->len)) {
8829 btrfs_put_block_group(cache);
8830 break;
8831 }
8832
8833 start = max(range->start, cache->key.objectid);
8834 end = min(range->start + range->len,
8835 cache->key.objectid + cache->key.offset);
8836
8837 if (end - start >= range->minlen) {
8838 if (!block_group_cache_done(cache)) {
8839 ret = cache_block_group(cache, NULL, root, 0);
8840 if (!ret)
8841 wait_block_group_cache_done(cache);
8842 }
8843 ret = btrfs_trim_block_group(cache,
8844 &group_trimmed,
8845 start,
8846 end,
8847 range->minlen);
8848
8849 trimmed += group_trimmed;
8850 if (ret) {
8851 btrfs_put_block_group(cache);
8852 break;
8853 }
8854 }
8855
8856 cache = next_block_group(fs_info->tree_root, cache);
8857 }
8858
8859 range->len = trimmed;
8860 return ret;
8736} 8861}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 714adc4ac4c2..20ddb28602a8 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2188,10 +2188,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2188 unsigned long nr_written = 0; 2188 unsigned long nr_written = 0;
2189 2189
2190 if (wbc->sync_mode == WB_SYNC_ALL) 2190 if (wbc->sync_mode == WB_SYNC_ALL)
2191 write_flags = WRITE_SYNC_PLUG; 2191 write_flags = WRITE_SYNC;
2192 else 2192 else
2193 write_flags = WRITE; 2193 write_flags = WRITE;
2194 2194
2195 trace___extent_writepage(page, inode, wbc);
2196
2195 WARN_ON(!PageLocked(page)); 2197 WARN_ON(!PageLocked(page));
2196 pg_offset = i_size & (PAGE_CACHE_SIZE - 1); 2198 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
2197 if (page->index > end_index || 2199 if (page->index > end_index ||
@@ -3690,6 +3692,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
3690 "wanted %lu %lu\n", (unsigned long long)eb->start, 3692 "wanted %lu %lu\n", (unsigned long long)eb->start,
3691 eb->len, start, min_len); 3693 eb->len, start, min_len);
3692 WARN_ON(1); 3694 WARN_ON(1);
3695 return -EINVAL;
3693 } 3696 }
3694 3697
3695 p = extent_buffer_page(eb, i); 3698 p = extent_buffer_page(eb, i);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 9318dfefd59c..f62c5442835d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -31,6 +31,7 @@
31#define EXTENT_BUFFER_UPTODATE 0 31#define EXTENT_BUFFER_UPTODATE 0
32#define EXTENT_BUFFER_BLOCKING 1 32#define EXTENT_BUFFER_BLOCKING 1
33#define EXTENT_BUFFER_DIRTY 2 33#define EXTENT_BUFFER_DIRTY 2
34#define EXTENT_BUFFER_CORRUPT 3
34 35
35/* these are flags for extent_clear_unlock_delalloc */ 36/* these are flags for extent_clear_unlock_delalloc */
36#define EXTENT_CLEAR_UNLOCK_PAGE 0x1 37#define EXTENT_CLEAR_UNLOCK_PAGE 0x1
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 4f19a3e1bf32..a6a9d4e8b491 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -48,7 +48,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
48 struct extent_buffer *leaf; 48 struct extent_buffer *leaf;
49 49
50 path = btrfs_alloc_path(); 50 path = btrfs_alloc_path();
51 BUG_ON(!path); 51 if (!path)
52 return -ENOMEM;
52 file_key.objectid = objectid; 53 file_key.objectid = objectid;
53 file_key.offset = pos; 54 file_key.offset = pos;
54 btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); 55 btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
@@ -169,6 +170,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
169 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 170 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
170 171
171 path = btrfs_alloc_path(); 172 path = btrfs_alloc_path();
173 if (!path)
174 return -ENOMEM;
172 if (bio->bi_size > PAGE_CACHE_SIZE * 8) 175 if (bio->bi_size > PAGE_CACHE_SIZE * 8)
173 path->reada = 2; 176 path->reada = 2;
174 177
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f447b783bb84..656bc0a892b1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -45,14 +45,14 @@
45 * and be replaced with calls into generic code. 45 * and be replaced with calls into generic code.
46 */ 46 */
47static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, 47static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
48 int write_bytes, 48 size_t write_bytes,
49 struct page **prepared_pages, 49 struct page **prepared_pages,
50 struct iov_iter *i) 50 struct iov_iter *i)
51{ 51{
52 size_t copied = 0; 52 size_t copied = 0;
53 size_t total_copied = 0;
53 int pg = 0; 54 int pg = 0;
54 int offset = pos & (PAGE_CACHE_SIZE - 1); 55 int offset = pos & (PAGE_CACHE_SIZE - 1);
55 int total_copied = 0;
56 56
57 while (write_bytes > 0) { 57 while (write_bytes > 0) {
58 size_t count = min_t(size_t, 58 size_t count = min_t(size_t,
@@ -88,9 +88,8 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
88 total_copied += copied; 88 total_copied += copied;
89 89
90 /* Return to btrfs_file_aio_write to fault page */ 90 /* Return to btrfs_file_aio_write to fault page */
91 if (unlikely(copied == 0)) { 91 if (unlikely(copied == 0))
92 break; 92 break;
93 }
94 93
95 if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { 94 if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
96 offset += copied; 95 offset += copied;
@@ -109,8 +108,6 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
109{ 108{
110 size_t i; 109 size_t i;
111 for (i = 0; i < num_pages; i++) { 110 for (i = 0; i < num_pages; i++) {
112 if (!pages[i])
113 break;
114 /* page checked is some magic around finding pages that 111 /* page checked is some magic around finding pages that
115 * have been modified without going through btrfs_set_page_dirty 112 * have been modified without going through btrfs_set_page_dirty
116 * clear it here 113 * clear it here
@@ -130,13 +127,12 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
130 * this also makes the decision about creating an inline extent vs 127 * this also makes the decision about creating an inline extent vs
131 * doing real data extents, marking pages dirty and delalloc as required. 128 * doing real data extents, marking pages dirty and delalloc as required.
132 */ 129 */
133static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, 130static noinline int dirty_and_release_pages(struct btrfs_root *root,
134 struct btrfs_root *root, 131 struct file *file,
135 struct file *file, 132 struct page **pages,
136 struct page **pages, 133 size_t num_pages,
137 size_t num_pages, 134 loff_t pos,
138 loff_t pos, 135 size_t write_bytes)
139 size_t write_bytes)
140{ 136{
141 int err = 0; 137 int err = 0;
142 int i; 138 int i;
@@ -154,7 +150,8 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
154 end_of_last_block = start_pos + num_bytes - 1; 150 end_of_last_block = start_pos + num_bytes - 1;
155 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, 151 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
156 NULL); 152 NULL);
157 BUG_ON(err); 153 if (err)
154 return err;
158 155
159 for (i = 0; i < num_pages; i++) { 156 for (i = 0; i < num_pages; i++) {
160 struct page *p = pages[i]; 157 struct page *p = pages[i];
@@ -162,13 +159,14 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
162 ClearPageChecked(p); 159 ClearPageChecked(p);
163 set_page_dirty(p); 160 set_page_dirty(p);
164 } 161 }
165 if (end_pos > isize) { 162
163 /*
164 * we've only changed i_size in ram, and we haven't updated
165 * the disk i_size. There is no need to log the inode
166 * at this time.
167 */
168 if (end_pos > isize)
166 i_size_write(inode, end_pos); 169 i_size_write(inode, end_pos);
167 /* we've only changed i_size in ram, and we haven't updated
168 * the disk i_size. There is no need to log the inode
169 * at this time.
170 */
171 }
172 return 0; 170 return 0;
173} 171}
174 172
@@ -610,6 +608,8 @@ again:
610 key.offset = split; 608 key.offset = split;
611 609
612 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 610 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
611 if (ret < 0)
612 goto out;
613 if (ret > 0 && path->slots[0] > 0) 613 if (ret > 0 && path->slots[0] > 0)
614 path->slots[0]--; 614 path->slots[0]--;
615 615
@@ -819,12 +819,11 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
819 last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; 819 last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
820 820
821 if (start_pos > inode->i_size) { 821 if (start_pos > inode->i_size) {
822 err = btrfs_cont_expand(inode, start_pos); 822 err = btrfs_cont_expand(inode, i_size_read(inode), start_pos);
823 if (err) 823 if (err)
824 return err; 824 return err;
825 } 825 }
826 826
827 memset(pages, 0, num_pages * sizeof(struct page *));
828again: 827again:
829 for (i = 0; i < num_pages; i++) { 828 for (i = 0; i < num_pages; i++) {
830 pages[i] = grab_cache_page(inode->i_mapping, index + i); 829 pages[i] = grab_cache_page(inode->i_mapping, index + i);
@@ -896,156 +895,71 @@ fail:
896 895
897} 896}
898 897
899static ssize_t btrfs_file_aio_write(struct kiocb *iocb, 898static noinline ssize_t __btrfs_buffered_write(struct file *file,
900 const struct iovec *iov, 899 struct iov_iter *i,
901 unsigned long nr_segs, loff_t pos) 900 loff_t pos)
902{ 901{
903 struct file *file = iocb->ki_filp;
904 struct inode *inode = fdentry(file)->d_inode; 902 struct inode *inode = fdentry(file)->d_inode;
905 struct btrfs_root *root = BTRFS_I(inode)->root; 903 struct btrfs_root *root = BTRFS_I(inode)->root;
906 struct page **pages = NULL; 904 struct page **pages = NULL;
907 struct iov_iter i;
908 loff_t *ppos = &iocb->ki_pos;
909 loff_t start_pos;
910 ssize_t num_written = 0;
911 ssize_t err = 0;
912 size_t count;
913 size_t ocount;
914 int ret = 0;
915 int nrptrs;
916 unsigned long first_index; 905 unsigned long first_index;
917 unsigned long last_index; 906 unsigned long last_index;
918 int will_write; 907 size_t num_written = 0;
919 int buffered = 0; 908 int nrptrs;
920 int copied = 0; 909 int ret;
921 int dirty_pages = 0;
922
923 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
924 (file->f_flags & O_DIRECT));
925
926 start_pos = pos;
927
928 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
929
930 mutex_lock(&inode->i_mutex);
931
932 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
933 if (err)
934 goto out;
935 count = ocount;
936
937 current->backing_dev_info = inode->i_mapping->backing_dev_info;
938 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
939 if (err)
940 goto out;
941
942 if (count == 0)
943 goto out;
944
945 err = file_remove_suid(file);
946 if (err)
947 goto out;
948
949 /*
950 * If BTRFS flips readonly due to some impossible error
951 * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
952 * although we have opened a file as writable, we have
953 * to stop this write operation to ensure FS consistency.
954 */
955 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
956 err = -EROFS;
957 goto out;
958 }
959
960 file_update_time(file);
961 BTRFS_I(inode)->sequence++;
962
963 if (unlikely(file->f_flags & O_DIRECT)) {
964 num_written = generic_file_direct_write(iocb, iov, &nr_segs,
965 pos, ppos, count,
966 ocount);
967 /*
968 * the generic O_DIRECT will update in-memory i_size after the
969 * DIOs are done. But our endio handlers that update the on
970 * disk i_size never update past the in memory i_size. So we
971 * need one more update here to catch any additions to the
972 * file
973 */
974 if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
975 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
976 mark_inode_dirty(inode);
977 }
978
979 if (num_written < 0) {
980 ret = num_written;
981 num_written = 0;
982 goto out;
983 } else if (num_written == count) {
984 /* pick up pos changes done by the generic code */
985 pos = *ppos;
986 goto out;
987 }
988 /*
989 * We are going to do buffered for the rest of the range, so we
990 * need to make sure to invalidate the buffered pages when we're
991 * done.
992 */
993 buffered = 1;
994 pos += num_written;
995 }
996 910
997 iov_iter_init(&i, iov, nr_segs, count, num_written); 911 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
998 nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) /
999 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 912 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
1000 (sizeof(struct page *))); 913 (sizeof(struct page *)));
1001 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 914 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
1002 if (!pages) { 915 if (!pages)
1003 ret = -ENOMEM; 916 return -ENOMEM;
1004 goto out;
1005 }
1006
1007 /* generic_write_checks can change our pos */
1008 start_pos = pos;
1009 917
1010 first_index = pos >> PAGE_CACHE_SHIFT; 918 first_index = pos >> PAGE_CACHE_SHIFT;
1011 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; 919 last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT;
1012 920
1013 while (iov_iter_count(&i) > 0) { 921 while (iov_iter_count(i) > 0) {
1014 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 922 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1015 size_t write_bytes = min(iov_iter_count(&i), 923 size_t write_bytes = min(iov_iter_count(i),
1016 nrptrs * (size_t)PAGE_CACHE_SIZE - 924 nrptrs * (size_t)PAGE_CACHE_SIZE -
1017 offset); 925 offset);
1018 size_t num_pages = (write_bytes + offset + 926 size_t num_pages = (write_bytes + offset +
1019 PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 927 PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
928 size_t dirty_pages;
929 size_t copied;
1020 930
1021 WARN_ON(num_pages > nrptrs); 931 WARN_ON(num_pages > nrptrs);
1022 memset(pages, 0, sizeof(struct page *) * nrptrs);
1023 932
1024 /* 933 /*
1025 * Fault pages before locking them in prepare_pages 934 * Fault pages before locking them in prepare_pages
1026 * to avoid recursive lock 935 * to avoid recursive lock
1027 */ 936 */
1028 if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) { 937 if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
1029 ret = -EFAULT; 938 ret = -EFAULT;
1030 goto out; 939 break;
1031 } 940 }
1032 941
1033 ret = btrfs_delalloc_reserve_space(inode, 942 ret = btrfs_delalloc_reserve_space(inode,
1034 num_pages << PAGE_CACHE_SHIFT); 943 num_pages << PAGE_CACHE_SHIFT);
1035 if (ret) 944 if (ret)
1036 goto out; 945 break;
1037 946
947 /*
948 * This is going to setup the pages array with the number of
949 * pages we want, so we don't really need to worry about the
950 * contents of pages from loop to loop
951 */
1038 ret = prepare_pages(root, file, pages, num_pages, 952 ret = prepare_pages(root, file, pages, num_pages,
1039 pos, first_index, last_index, 953 pos, first_index, last_index,
1040 write_bytes); 954 write_bytes);
1041 if (ret) { 955 if (ret) {
1042 btrfs_delalloc_release_space(inode, 956 btrfs_delalloc_release_space(inode,
1043 num_pages << PAGE_CACHE_SHIFT); 957 num_pages << PAGE_CACHE_SHIFT);
1044 goto out; 958 break;
1045 } 959 }
1046 960
1047 copied = btrfs_copy_from_user(pos, num_pages, 961 copied = btrfs_copy_from_user(pos, num_pages,
1048 write_bytes, pages, &i); 962 write_bytes, pages, i);
1049 963
1050 /* 964 /*
1051 * if we have trouble faulting in the pages, fall 965 * if we have trouble faulting in the pages, fall
@@ -1061,6 +975,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1061 PAGE_CACHE_SIZE - 1) >> 975 PAGE_CACHE_SIZE - 1) >>
1062 PAGE_CACHE_SHIFT; 976 PAGE_CACHE_SHIFT;
1063 977
978 /*
979 * If we had a short copy we need to release the excess delaloc
980 * bytes we reserved. We need to increment outstanding_extents
981 * because btrfs_delalloc_release_space will decrement it, but
982 * we still have an outstanding extent for the chunk we actually
983 * managed to copy.
984 */
1064 if (num_pages > dirty_pages) { 985 if (num_pages > dirty_pages) {
1065 if (copied > 0) 986 if (copied > 0)
1066 atomic_inc( 987 atomic_inc(
@@ -1071,39 +992,157 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1071 } 992 }
1072 993
1073 if (copied > 0) { 994 if (copied > 0) {
1074 dirty_and_release_pages(NULL, root, file, pages, 995 ret = dirty_and_release_pages(root, file, pages,
1075 dirty_pages, pos, copied); 996 dirty_pages, pos,
997 copied);
998 if (ret) {
999 btrfs_delalloc_release_space(inode,
1000 dirty_pages << PAGE_CACHE_SHIFT);
1001 btrfs_drop_pages(pages, num_pages);
1002 break;
1003 }
1076 } 1004 }
1077 1005
1078 btrfs_drop_pages(pages, num_pages); 1006 btrfs_drop_pages(pages, num_pages);
1079 1007
1080 if (copied > 0) { 1008 cond_resched();
1081 if (will_write) { 1009
1082 filemap_fdatawrite_range(inode->i_mapping, pos, 1010 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
1083 pos + copied - 1); 1011 dirty_pages);
1084 } else { 1012 if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1085 balance_dirty_pages_ratelimited_nr( 1013 btrfs_btree_balance_dirty(root, 1);
1086 inode->i_mapping, 1014 btrfs_throttle(root);
1087 dirty_pages);
1088 if (dirty_pages <
1089 (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1090 btrfs_btree_balance_dirty(root, 1);
1091 btrfs_throttle(root);
1092 }
1093 }
1094 1015
1095 pos += copied; 1016 pos += copied;
1096 num_written += copied; 1017 num_written += copied;
1018 }
1097 1019
1098 cond_resched(); 1020 kfree(pages);
1021
1022 return num_written ? num_written : ret;
1023}
1024
1025static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1026 const struct iovec *iov,
1027 unsigned long nr_segs, loff_t pos,
1028 loff_t *ppos, size_t count, size_t ocount)
1029{
1030 struct file *file = iocb->ki_filp;
1031 struct inode *inode = fdentry(file)->d_inode;
1032 struct iov_iter i;
1033 ssize_t written;
1034 ssize_t written_buffered;
1035 loff_t endbyte;
1036 int err;
1037
1038 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
1039 count, ocount);
1040
1041 /*
1042 * the generic O_DIRECT will update in-memory i_size after the
1043 * DIOs are done. But our endio handlers that update the on
1044 * disk i_size never update past the in memory i_size. So we
1045 * need one more update here to catch any additions to the
1046 * file
1047 */
1048 if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
1049 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
1050 mark_inode_dirty(inode);
1099 } 1051 }
1052
1053 if (written < 0 || written == count)
1054 return written;
1055
1056 pos += written;
1057 count -= written;
1058 iov_iter_init(&i, iov, nr_segs, count, written);
1059 written_buffered = __btrfs_buffered_write(file, &i, pos);
1060 if (written_buffered < 0) {
1061 err = written_buffered;
1062 goto out;
1063 }
1064 endbyte = pos + written_buffered - 1;
1065 err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
1066 if (err)
1067 goto out;
1068 written += written_buffered;
1069 *ppos = pos + written_buffered;
1070 invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
1071 endbyte >> PAGE_CACHE_SHIFT);
1100out: 1072out:
1101 mutex_unlock(&inode->i_mutex); 1073 return written ? written : err;
1102 if (ret) 1074}
1103 err = ret;
1104 1075
1105 kfree(pages); 1076static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1106 *ppos = pos; 1077 const struct iovec *iov,
1078 unsigned long nr_segs, loff_t pos)
1079{
1080 struct file *file = iocb->ki_filp;
1081 struct inode *inode = fdentry(file)->d_inode;
1082 struct btrfs_root *root = BTRFS_I(inode)->root;
1083 loff_t *ppos = &iocb->ki_pos;
1084 ssize_t num_written = 0;
1085 ssize_t err = 0;
1086 size_t count, ocount;
1087
1088 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1089
1090 mutex_lock(&inode->i_mutex);
1091
1092 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
1093 if (err) {
1094 mutex_unlock(&inode->i_mutex);
1095 goto out;
1096 }
1097 count = ocount;
1098
1099 current->backing_dev_info = inode->i_mapping->backing_dev_info;
1100 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1101 if (err) {
1102 mutex_unlock(&inode->i_mutex);
1103 goto out;
1104 }
1105
1106 if (count == 0) {
1107 mutex_unlock(&inode->i_mutex);
1108 goto out;
1109 }
1110
1111 err = file_remove_suid(file);
1112 if (err) {
1113 mutex_unlock(&inode->i_mutex);
1114 goto out;
1115 }
1116
1117 /*
1118 * If BTRFS flips readonly due to some impossible error
1119 * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
1120 * although we have opened a file as writable, we have
1121 * to stop this write operation to ensure FS consistency.
1122 */
1123 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
1124 mutex_unlock(&inode->i_mutex);
1125 err = -EROFS;
1126 goto out;
1127 }
1128
1129 file_update_time(file);
1130 BTRFS_I(inode)->sequence++;
1131
1132 if (unlikely(file->f_flags & O_DIRECT)) {
1133 num_written = __btrfs_direct_write(iocb, iov, nr_segs,
1134 pos, ppos, count, ocount);
1135 } else {
1136 struct iov_iter i;
1137
1138 iov_iter_init(&i, iov, nr_segs, count, num_written);
1139
1140 num_written = __btrfs_buffered_write(file, &i, pos);
1141 if (num_written > 0)
1142 *ppos = pos + num_written;
1143 }
1144
1145 mutex_unlock(&inode->i_mutex);
1107 1146
1108 /* 1147 /*
1109 * we want to make sure fsync finds this change 1148 * we want to make sure fsync finds this change
@@ -1118,43 +1157,12 @@ out:
1118 * one running right now. 1157 * one running right now.
1119 */ 1158 */
1120 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; 1159 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
1121 1160 if (num_written > 0 || num_written == -EIOCBQUEUED) {
1122 if (num_written > 0 && will_write) { 1161 err = generic_write_sync(file, pos, num_written);
1123 struct btrfs_trans_handle *trans; 1162 if (err < 0 && num_written > 0)
1124
1125 err = btrfs_wait_ordered_range(inode, start_pos, num_written);
1126 if (err)
1127 num_written = err; 1163 num_written = err;
1128
1129 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
1130 trans = btrfs_start_transaction(root, 0);
1131 if (IS_ERR(trans)) {
1132 num_written = PTR_ERR(trans);
1133 goto done;
1134 }
1135 mutex_lock(&inode->i_mutex);
1136 ret = btrfs_log_dentry_safe(trans, root,
1137 file->f_dentry);
1138 mutex_unlock(&inode->i_mutex);
1139 if (ret == 0) {
1140 ret = btrfs_sync_log(trans, root);
1141 if (ret == 0)
1142 btrfs_end_transaction(trans, root);
1143 else
1144 btrfs_commit_transaction(trans, root);
1145 } else if (ret != BTRFS_NO_LOG_SYNC) {
1146 btrfs_commit_transaction(trans, root);
1147 } else {
1148 btrfs_end_transaction(trans, root);
1149 }
1150 }
1151 if (file->f_flags & O_DIRECT && buffered) {
1152 invalidate_mapping_pages(inode->i_mapping,
1153 start_pos >> PAGE_CACHE_SHIFT,
1154 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
1155 }
1156 } 1164 }
1157done: 1165out:
1158 current->backing_dev_info = NULL; 1166 current->backing_dev_info = NULL;
1159 return num_written ? num_written : err; 1167 return num_written ? num_written : err;
1160} 1168}
@@ -1197,6 +1205,7 @@ int btrfs_sync_file(struct file *file, int datasync)
1197 int ret = 0; 1205 int ret = 0;
1198 struct btrfs_trans_handle *trans; 1206 struct btrfs_trans_handle *trans;
1199 1207
1208 trace_btrfs_sync_file(file, datasync);
1200 1209
1201 /* we wait first, since the writeback may change the inode */ 1210 /* we wait first, since the writeback may change the inode */
1202 root->log_batch++; 1211 root->log_batch++;
@@ -1324,7 +1333,8 @@ static long btrfs_fallocate(struct file *file, int mode,
1324 goto out; 1333 goto out;
1325 1334
1326 if (alloc_start > inode->i_size) { 1335 if (alloc_start > inode->i_size) {
1327 ret = btrfs_cont_expand(inode, alloc_start); 1336 ret = btrfs_cont_expand(inode, i_size_read(inode),
1337 alloc_start);
1328 if (ret) 1338 if (ret)
1329 goto out; 1339 goto out;
1330 } 1340 }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index a0390657451b..0037427d8a9d 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -393,7 +393,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
393 break; 393 break;
394 394
395 need_loop = 1; 395 need_loop = 1;
396 e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); 396 e = kmem_cache_zalloc(btrfs_free_space_cachep,
397 GFP_NOFS);
397 if (!e) { 398 if (!e) {
398 kunmap(page); 399 kunmap(page);
399 unlock_page(page); 400 unlock_page(page);
@@ -405,7 +406,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
405 e->bytes = le64_to_cpu(entry->bytes); 406 e->bytes = le64_to_cpu(entry->bytes);
406 if (!e->bytes) { 407 if (!e->bytes) {
407 kunmap(page); 408 kunmap(page);
408 kfree(e); 409 kmem_cache_free(btrfs_free_space_cachep, e);
409 unlock_page(page); 410 unlock_page(page);
410 page_cache_release(page); 411 page_cache_release(page);
411 goto free_cache; 412 goto free_cache;
@@ -420,7 +421,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
420 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 421 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
421 if (!e->bitmap) { 422 if (!e->bitmap) {
422 kunmap(page); 423 kunmap(page);
423 kfree(e); 424 kmem_cache_free(
425 btrfs_free_space_cachep, e);
424 unlock_page(page); 426 unlock_page(page);
425 page_cache_release(page); 427 page_cache_release(page);
426 goto free_cache; 428 goto free_cache;
@@ -1187,7 +1189,7 @@ static void free_bitmap(struct btrfs_block_group_cache *block_group,
1187{ 1189{
1188 unlink_free_space(block_group, bitmap_info); 1190 unlink_free_space(block_group, bitmap_info);
1189 kfree(bitmap_info->bitmap); 1191 kfree(bitmap_info->bitmap);
1190 kfree(bitmap_info); 1192 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1191 block_group->total_bitmaps--; 1193 block_group->total_bitmaps--;
1192 recalculate_thresholds(block_group); 1194 recalculate_thresholds(block_group);
1193} 1195}
@@ -1285,9 +1287,22 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
1285 * If we are below the extents threshold then we can add this as an 1287 * If we are below the extents threshold then we can add this as an
1286 * extent, and don't have to deal with the bitmap 1288 * extent, and don't have to deal with the bitmap
1287 */ 1289 */
1288 if (block_group->free_extents < block_group->extents_thresh && 1290 if (block_group->free_extents < block_group->extents_thresh) {
1289 info->bytes > block_group->sectorsize * 4) 1291 /*
1290 return 0; 1292 * If this block group has some small extents we don't want to
1293 * use up all of our free slots in the cache with them, we want
1294 * to reserve them to larger extents, however if we have plent
1295 * of cache left then go ahead an dadd them, no sense in adding
1296 * the overhead of a bitmap if we don't have to.
1297 */
1298 if (info->bytes <= block_group->sectorsize * 4) {
1299 if (block_group->free_extents * 2 <=
1300 block_group->extents_thresh)
1301 return 0;
1302 } else {
1303 return 0;
1304 }
1305 }
1291 1306
1292 /* 1307 /*
1293 * some block groups are so tiny they can't be enveloped by a bitmap, so 1308 * some block groups are so tiny they can't be enveloped by a bitmap, so
@@ -1342,8 +1357,8 @@ new_bitmap:
1342 1357
1343 /* no pre-allocated info, allocate a new one */ 1358 /* no pre-allocated info, allocate a new one */
1344 if (!info) { 1359 if (!info) {
1345 info = kzalloc(sizeof(struct btrfs_free_space), 1360 info = kmem_cache_zalloc(btrfs_free_space_cachep,
1346 GFP_NOFS); 1361 GFP_NOFS);
1347 if (!info) { 1362 if (!info) {
1348 spin_lock(&block_group->tree_lock); 1363 spin_lock(&block_group->tree_lock);
1349 ret = -ENOMEM; 1364 ret = -ENOMEM;
@@ -1365,7 +1380,7 @@ out:
1365 if (info) { 1380 if (info) {
1366 if (info->bitmap) 1381 if (info->bitmap)
1367 kfree(info->bitmap); 1382 kfree(info->bitmap);
1368 kfree(info); 1383 kmem_cache_free(btrfs_free_space_cachep, info);
1369 } 1384 }
1370 1385
1371 return ret; 1386 return ret;
@@ -1398,7 +1413,7 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1398 else 1413 else
1399 __unlink_free_space(block_group, right_info); 1414 __unlink_free_space(block_group, right_info);
1400 info->bytes += right_info->bytes; 1415 info->bytes += right_info->bytes;
1401 kfree(right_info); 1416 kmem_cache_free(btrfs_free_space_cachep, right_info);
1402 merged = true; 1417 merged = true;
1403 } 1418 }
1404 1419
@@ -1410,7 +1425,7 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1410 __unlink_free_space(block_group, left_info); 1425 __unlink_free_space(block_group, left_info);
1411 info->offset = left_info->offset; 1426 info->offset = left_info->offset;
1412 info->bytes += left_info->bytes; 1427 info->bytes += left_info->bytes;
1413 kfree(left_info); 1428 kmem_cache_free(btrfs_free_space_cachep, left_info);
1414 merged = true; 1429 merged = true;
1415 } 1430 }
1416 1431
@@ -1423,7 +1438,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1423 struct btrfs_free_space *info; 1438 struct btrfs_free_space *info;
1424 int ret = 0; 1439 int ret = 0;
1425 1440
1426 info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); 1441 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1427 if (!info) 1442 if (!info)
1428 return -ENOMEM; 1443 return -ENOMEM;
1429 1444
@@ -1450,7 +1465,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1450link: 1465link:
1451 ret = link_free_space(block_group, info); 1466 ret = link_free_space(block_group, info);
1452 if (ret) 1467 if (ret)
1453 kfree(info); 1468 kmem_cache_free(btrfs_free_space_cachep, info);
1454out: 1469out:
1455 spin_unlock(&block_group->tree_lock); 1470 spin_unlock(&block_group->tree_lock);
1456 1471
@@ -1520,7 +1535,7 @@ again:
1520 kfree(info->bitmap); 1535 kfree(info->bitmap);
1521 block_group->total_bitmaps--; 1536 block_group->total_bitmaps--;
1522 } 1537 }
1523 kfree(info); 1538 kmem_cache_free(btrfs_free_space_cachep, info);
1524 goto out_lock; 1539 goto out_lock;
1525 } 1540 }
1526 1541
@@ -1556,7 +1571,7 @@ again:
1556 /* the hole we're creating ends at the end 1571 /* the hole we're creating ends at the end
1557 * of the info struct, just free the info 1572 * of the info struct, just free the info
1558 */ 1573 */
1559 kfree(info); 1574 kmem_cache_free(btrfs_free_space_cachep, info);
1560 } 1575 }
1561 spin_unlock(&block_group->tree_lock); 1576 spin_unlock(&block_group->tree_lock);
1562 1577
@@ -1629,30 +1644,28 @@ __btrfs_return_cluster_to_free_space(
1629{ 1644{
1630 struct btrfs_free_space *entry; 1645 struct btrfs_free_space *entry;
1631 struct rb_node *node; 1646 struct rb_node *node;
1632 bool bitmap;
1633 1647
1634 spin_lock(&cluster->lock); 1648 spin_lock(&cluster->lock);
1635 if (cluster->block_group != block_group) 1649 if (cluster->block_group != block_group)
1636 goto out; 1650 goto out;
1637 1651
1638 bitmap = cluster->points_to_bitmap;
1639 cluster->block_group = NULL; 1652 cluster->block_group = NULL;
1640 cluster->window_start = 0; 1653 cluster->window_start = 0;
1641 list_del_init(&cluster->block_group_list); 1654 list_del_init(&cluster->block_group_list);
1642 cluster->points_to_bitmap = false;
1643
1644 if (bitmap)
1645 goto out;
1646 1655
1647 node = rb_first(&cluster->root); 1656 node = rb_first(&cluster->root);
1648 while (node) { 1657 while (node) {
1658 bool bitmap;
1659
1649 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1660 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1650 node = rb_next(&entry->offset_index); 1661 node = rb_next(&entry->offset_index);
1651 rb_erase(&entry->offset_index, &cluster->root); 1662 rb_erase(&entry->offset_index, &cluster->root);
1652 BUG_ON(entry->bitmap); 1663
1653 try_merge_free_space(block_group, entry, false); 1664 bitmap = (entry->bitmap != NULL);
1665 if (!bitmap)
1666 try_merge_free_space(block_group, entry, false);
1654 tree_insert_offset(&block_group->free_space_offset, 1667 tree_insert_offset(&block_group->free_space_offset,
1655 entry->offset, &entry->offset_index, 0); 1668 entry->offset, &entry->offset_index, bitmap);
1656 } 1669 }
1657 cluster->root = RB_ROOT; 1670 cluster->root = RB_ROOT;
1658 1671
@@ -1689,7 +1702,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1689 unlink_free_space(block_group, info); 1702 unlink_free_space(block_group, info);
1690 if (info->bitmap) 1703 if (info->bitmap)
1691 kfree(info->bitmap); 1704 kfree(info->bitmap);
1692 kfree(info); 1705 kmem_cache_free(btrfs_free_space_cachep, info);
1693 if (need_resched()) { 1706 if (need_resched()) {
1694 spin_unlock(&block_group->tree_lock); 1707 spin_unlock(&block_group->tree_lock);
1695 cond_resched(); 1708 cond_resched();
@@ -1722,7 +1735,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1722 entry->offset += bytes; 1735 entry->offset += bytes;
1723 entry->bytes -= bytes; 1736 entry->bytes -= bytes;
1724 if (!entry->bytes) 1737 if (!entry->bytes)
1725 kfree(entry); 1738 kmem_cache_free(btrfs_free_space_cachep, entry);
1726 else 1739 else
1727 link_free_space(block_group, entry); 1740 link_free_space(block_group, entry);
1728 } 1741 }
@@ -1775,50 +1788,24 @@ int btrfs_return_cluster_to_free_space(
1775 1788
1776static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 1789static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1777 struct btrfs_free_cluster *cluster, 1790 struct btrfs_free_cluster *cluster,
1791 struct btrfs_free_space *entry,
1778 u64 bytes, u64 min_start) 1792 u64 bytes, u64 min_start)
1779{ 1793{
1780 struct btrfs_free_space *entry;
1781 int err; 1794 int err;
1782 u64 search_start = cluster->window_start; 1795 u64 search_start = cluster->window_start;
1783 u64 search_bytes = bytes; 1796 u64 search_bytes = bytes;
1784 u64 ret = 0; 1797 u64 ret = 0;
1785 1798
1786 spin_lock(&block_group->tree_lock);
1787 spin_lock(&cluster->lock);
1788
1789 if (!cluster->points_to_bitmap)
1790 goto out;
1791
1792 if (cluster->block_group != block_group)
1793 goto out;
1794
1795 /*
1796 * search_start is the beginning of the bitmap, but at some point it may
1797 * be a good idea to point to the actual start of the free area in the
1798 * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
1799 * to 1 to make sure we get the bitmap entry
1800 */
1801 entry = tree_search_offset(block_group,
1802 offset_to_bitmap(block_group, search_start),
1803 1, 0);
1804 if (!entry || !entry->bitmap)
1805 goto out;
1806
1807 search_start = min_start; 1799 search_start = min_start;
1808 search_bytes = bytes; 1800 search_bytes = bytes;
1809 1801
1810 err = search_bitmap(block_group, entry, &search_start, 1802 err = search_bitmap(block_group, entry, &search_start,
1811 &search_bytes); 1803 &search_bytes);
1812 if (err) 1804 if (err)
1813 goto out; 1805 return 0;
1814 1806
1815 ret = search_start; 1807 ret = search_start;
1816 bitmap_clear_bits(block_group, entry, ret, bytes); 1808 bitmap_clear_bits(block_group, entry, ret, bytes);
1817 if (entry->bytes == 0)
1818 free_bitmap(block_group, entry);
1819out:
1820 spin_unlock(&cluster->lock);
1821 spin_unlock(&block_group->tree_lock);
1822 1809
1823 return ret; 1810 return ret;
1824} 1811}
@@ -1836,10 +1823,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1836 struct rb_node *node; 1823 struct rb_node *node;
1837 u64 ret = 0; 1824 u64 ret = 0;
1838 1825
1839 if (cluster->points_to_bitmap)
1840 return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
1841 min_start);
1842
1843 spin_lock(&cluster->lock); 1826 spin_lock(&cluster->lock);
1844 if (bytes > cluster->max_size) 1827 if (bytes > cluster->max_size)
1845 goto out; 1828 goto out;
@@ -1852,9 +1835,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1852 goto out; 1835 goto out;
1853 1836
1854 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1837 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1855
1856 while(1) { 1838 while(1) {
1857 if (entry->bytes < bytes || entry->offset < min_start) { 1839 if (entry->bytes < bytes ||
1840 (!entry->bitmap && entry->offset < min_start)) {
1858 struct rb_node *node; 1841 struct rb_node *node;
1859 1842
1860 node = rb_next(&entry->offset_index); 1843 node = rb_next(&entry->offset_index);
@@ -1864,10 +1847,27 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1864 offset_index); 1847 offset_index);
1865 continue; 1848 continue;
1866 } 1849 }
1867 ret = entry->offset;
1868 1850
1869 entry->offset += bytes; 1851 if (entry->bitmap) {
1870 entry->bytes -= bytes; 1852 ret = btrfs_alloc_from_bitmap(block_group,
1853 cluster, entry, bytes,
1854 min_start);
1855 if (ret == 0) {
1856 struct rb_node *node;
1857 node = rb_next(&entry->offset_index);
1858 if (!node)
1859 break;
1860 entry = rb_entry(node, struct btrfs_free_space,
1861 offset_index);
1862 continue;
1863 }
1864 } else {
1865
1866 ret = entry->offset;
1867
1868 entry->offset += bytes;
1869 entry->bytes -= bytes;
1870 }
1871 1871
1872 if (entry->bytes == 0) 1872 if (entry->bytes == 0)
1873 rb_erase(&entry->offset_index, &cluster->root); 1873 rb_erase(&entry->offset_index, &cluster->root);
@@ -1884,7 +1884,12 @@ out:
1884 block_group->free_space -= bytes; 1884 block_group->free_space -= bytes;
1885 if (entry->bytes == 0) { 1885 if (entry->bytes == 0) {
1886 block_group->free_extents--; 1886 block_group->free_extents--;
1887 kfree(entry); 1887 if (entry->bitmap) {
1888 kfree(entry->bitmap);
1889 block_group->total_bitmaps--;
1890 recalculate_thresholds(block_group);
1891 }
1892 kmem_cache_free(btrfs_free_space_cachep, entry);
1888 } 1893 }
1889 1894
1890 spin_unlock(&block_group->tree_lock); 1895 spin_unlock(&block_group->tree_lock);
@@ -1904,12 +1909,13 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1904 unsigned long found_bits; 1909 unsigned long found_bits;
1905 unsigned long start = 0; 1910 unsigned long start = 0;
1906 unsigned long total_found = 0; 1911 unsigned long total_found = 0;
1912 int ret;
1907 bool found = false; 1913 bool found = false;
1908 1914
1909 i = offset_to_bit(entry->offset, block_group->sectorsize, 1915 i = offset_to_bit(entry->offset, block_group->sectorsize,
1910 max_t(u64, offset, entry->offset)); 1916 max_t(u64, offset, entry->offset));
1911 search_bits = bytes_to_bits(min_bytes, block_group->sectorsize); 1917 search_bits = bytes_to_bits(bytes, block_group->sectorsize);
1912 total_bits = bytes_to_bits(bytes, block_group->sectorsize); 1918 total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
1913 1919
1914again: 1920again:
1915 found_bits = 0; 1921 found_bits = 0;
@@ -1926,7 +1932,7 @@ again:
1926 } 1932 }
1927 1933
1928 if (!found_bits) 1934 if (!found_bits)
1929 return -1; 1935 return -ENOSPC;
1930 1936
1931 if (!found) { 1937 if (!found) {
1932 start = i; 1938 start = i;
@@ -1950,189 +1956,208 @@ again:
1950 1956
1951 cluster->window_start = start * block_group->sectorsize + 1957 cluster->window_start = start * block_group->sectorsize +
1952 entry->offset; 1958 entry->offset;
1953 cluster->points_to_bitmap = true; 1959 rb_erase(&entry->offset_index, &block_group->free_space_offset);
1960 ret = tree_insert_offset(&cluster->root, entry->offset,
1961 &entry->offset_index, 1);
1962 BUG_ON(ret);
1954 1963
1955 return 0; 1964 return 0;
1956} 1965}
1957 1966
1958/* 1967/*
1959 * here we try to find a cluster of blocks in a block group. The goal 1968 * This searches the block group for just extents to fill the cluster with.
1960 * is to find at least bytes free and up to empty_size + bytes free.
1961 * We might not find them all in one contiguous area.
1962 *
1963 * returns zero and sets up cluster if things worked out, otherwise
1964 * it returns -enospc
1965 */ 1969 */
1966int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 1970static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
1967 struct btrfs_root *root, 1971 struct btrfs_free_cluster *cluster,
1968 struct btrfs_block_group_cache *block_group, 1972 u64 offset, u64 bytes, u64 min_bytes)
1969 struct btrfs_free_cluster *cluster,
1970 u64 offset, u64 bytes, u64 empty_size)
1971{ 1973{
1974 struct btrfs_free_space *first = NULL;
1972 struct btrfs_free_space *entry = NULL; 1975 struct btrfs_free_space *entry = NULL;
1976 struct btrfs_free_space *prev = NULL;
1977 struct btrfs_free_space *last;
1973 struct rb_node *node; 1978 struct rb_node *node;
1974 struct btrfs_free_space *next;
1975 struct btrfs_free_space *last = NULL;
1976 u64 min_bytes;
1977 u64 window_start; 1979 u64 window_start;
1978 u64 window_free; 1980 u64 window_free;
1979 u64 max_extent = 0; 1981 u64 max_extent;
1980 bool found_bitmap = false; 1982 u64 max_gap = 128 * 1024;
1981 int ret;
1982 1983
1983 /* for metadata, allow allocates with more holes */ 1984 entry = tree_search_offset(block_group, offset, 0, 1);
1984 if (btrfs_test_opt(root, SSD_SPREAD)) { 1985 if (!entry)
1985 min_bytes = bytes + empty_size; 1986 return -ENOSPC;
1986 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
1987 /*
1988 * we want to do larger allocations when we are
1989 * flushing out the delayed refs, it helps prevent
1990 * making more work as we go along.
1991 */
1992 if (trans->transaction->delayed_refs.flushing)
1993 min_bytes = max(bytes, (bytes + empty_size) >> 1);
1994 else
1995 min_bytes = max(bytes, (bytes + empty_size) >> 4);
1996 } else
1997 min_bytes = max(bytes, (bytes + empty_size) >> 2);
1998
1999 spin_lock(&block_group->tree_lock);
2000 spin_lock(&cluster->lock);
2001
2002 /* someone already found a cluster, hooray */
2003 if (cluster->block_group) {
2004 ret = 0;
2005 goto out;
2006 }
2007again:
2008 entry = tree_search_offset(block_group, offset, found_bitmap, 1);
2009 if (!entry) {
2010 ret = -ENOSPC;
2011 goto out;
2012 }
2013 1987
2014 /* 1988 /*
2015 * If found_bitmap is true, we exhausted our search for extent entries, 1989 * We don't want bitmaps, so just move along until we find a normal
2016 * and we just want to search all of the bitmaps that we can find, and 1990 * extent entry.
2017 * ignore any extent entries we find.
2018 */ 1991 */
2019 while (entry->bitmap || found_bitmap || 1992 while (entry->bitmap) {
2020 (!entry->bitmap && entry->bytes < min_bytes)) { 1993 node = rb_next(&entry->offset_index);
2021 struct rb_node *node = rb_next(&entry->offset_index); 1994 if (!node)
2022 1995 return -ENOSPC;
2023 if (entry->bitmap && entry->bytes > bytes + empty_size) {
2024 ret = btrfs_bitmap_cluster(block_group, entry, cluster,
2025 offset, bytes + empty_size,
2026 min_bytes);
2027 if (!ret)
2028 goto got_it;
2029 }
2030
2031 if (!node) {
2032 ret = -ENOSPC;
2033 goto out;
2034 }
2035 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1996 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2036 } 1997 }
2037 1998
2038 /*
2039 * We already searched all the extent entries from the passed in offset
2040 * to the end and didn't find enough space for the cluster, and we also
2041 * didn't find any bitmaps that met our criteria, just go ahead and exit
2042 */
2043 if (found_bitmap) {
2044 ret = -ENOSPC;
2045 goto out;
2046 }
2047
2048 cluster->points_to_bitmap = false;
2049 window_start = entry->offset; 1999 window_start = entry->offset;
2050 window_free = entry->bytes; 2000 window_free = entry->bytes;
2051 last = entry;
2052 max_extent = entry->bytes; 2001 max_extent = entry->bytes;
2002 first = entry;
2003 last = entry;
2004 prev = entry;
2053 2005
2054 while (1) { 2006 while (window_free <= min_bytes) {
2055 /* out window is just right, lets fill it */ 2007 node = rb_next(&entry->offset_index);
2056 if (window_free >= bytes + empty_size) 2008 if (!node)
2057 break; 2009 return -ENOSPC;
2058 2010 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2059 node = rb_next(&last->offset_index);
2060 if (!node) {
2061 if (found_bitmap)
2062 goto again;
2063 ret = -ENOSPC;
2064 goto out;
2065 }
2066 next = rb_entry(node, struct btrfs_free_space, offset_index);
2067 2011
2068 /* 2012 if (entry->bitmap)
2069 * we found a bitmap, so if this search doesn't result in a
2070 * cluster, we know to go and search again for the bitmaps and
2071 * start looking for space there
2072 */
2073 if (next->bitmap) {
2074 if (!found_bitmap)
2075 offset = next->offset;
2076 found_bitmap = true;
2077 last = next;
2078 continue; 2013 continue;
2079 }
2080
2081 /* 2014 /*
2082 * we haven't filled the empty size and the window is 2015 * we haven't filled the empty size and the window is
2083 * very large. reset and try again 2016 * very large. reset and try again
2084 */ 2017 */
2085 if (next->offset - (last->offset + last->bytes) > 128 * 1024 || 2018 if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
2086 next->offset - window_start > (bytes + empty_size) * 2) { 2019 entry->offset - window_start > (min_bytes * 2)) {
2087 entry = next; 2020 first = entry;
2088 window_start = entry->offset; 2021 window_start = entry->offset;
2089 window_free = entry->bytes; 2022 window_free = entry->bytes;
2090 last = entry; 2023 last = entry;
2091 max_extent = entry->bytes; 2024 max_extent = entry->bytes;
2092 } else { 2025 } else {
2093 last = next; 2026 last = entry;
2094 window_free += next->bytes; 2027 window_free += entry->bytes;
2095 if (entry->bytes > max_extent) 2028 if (entry->bytes > max_extent)
2096 max_extent = entry->bytes; 2029 max_extent = entry->bytes;
2097 } 2030 }
2031 prev = entry;
2098 } 2032 }
2099 2033
2100 cluster->window_start = entry->offset; 2034 cluster->window_start = first->offset;
2035
2036 node = &first->offset_index;
2101 2037
2102 /* 2038 /*
2103 * now we've found our entries, pull them out of the free space 2039 * now we've found our entries, pull them out of the free space
2104 * cache and put them into the cluster rbtree 2040 * cache and put them into the cluster rbtree
2105 *
2106 * The cluster includes an rbtree, but only uses the offset index
2107 * of each free space cache entry.
2108 */ 2041 */
2109 while (1) { 2042 do {
2043 int ret;
2044
2045 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2110 node = rb_next(&entry->offset_index); 2046 node = rb_next(&entry->offset_index);
2111 if (entry->bitmap && node) { 2047 if (entry->bitmap)
2112 entry = rb_entry(node, struct btrfs_free_space,
2113 offset_index);
2114 continue; 2048 continue;
2115 } else if (entry->bitmap && !node) {
2116 break;
2117 }
2118 2049
2119 rb_erase(&entry->offset_index, &block_group->free_space_offset); 2050 rb_erase(&entry->offset_index, &block_group->free_space_offset);
2120 ret = tree_insert_offset(&cluster->root, entry->offset, 2051 ret = tree_insert_offset(&cluster->root, entry->offset,
2121 &entry->offset_index, 0); 2052 &entry->offset_index, 0);
2122 BUG_ON(ret); 2053 BUG_ON(ret);
2054 } while (node && entry != last);
2123 2055
2124 if (!node || entry == last) 2056 cluster->max_size = max_extent;
2125 break; 2057
2058 return 0;
2059}
2060
2061/*
2062 * This specifically looks for bitmaps that may work in the cluster, we assume
2063 * that we have already failed to find extents that will work.
2064 */
2065static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2066 struct btrfs_free_cluster *cluster,
2067 u64 offset, u64 bytes, u64 min_bytes)
2068{
2069 struct btrfs_free_space *entry;
2070 struct rb_node *node;
2071 int ret = -ENOSPC;
2072
2073 if (block_group->total_bitmaps == 0)
2074 return -ENOSPC;
2126 2075
2076 entry = tree_search_offset(block_group,
2077 offset_to_bitmap(block_group, offset),
2078 0, 1);
2079 if (!entry)
2080 return -ENOSPC;
2081
2082 node = &entry->offset_index;
2083 do {
2127 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2084 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2085 node = rb_next(&entry->offset_index);
2086 if (!entry->bitmap)
2087 continue;
2088 if (entry->bytes < min_bytes)
2089 continue;
2090 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2091 bytes, min_bytes);
2092 } while (ret && node);
2093
2094 return ret;
2095}
2096
2097/*
2098 * here we try to find a cluster of blocks in a block group. The goal
2099 * is to find at least bytes free and up to empty_size + bytes free.
2100 * We might not find them all in one contiguous area.
2101 *
2102 * returns zero and sets up cluster if things worked out, otherwise
2103 * it returns -enospc
2104 */
2105int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2106 struct btrfs_root *root,
2107 struct btrfs_block_group_cache *block_group,
2108 struct btrfs_free_cluster *cluster,
2109 u64 offset, u64 bytes, u64 empty_size)
2110{
2111 u64 min_bytes;
2112 int ret;
2113
2114 /* for metadata, allow allocates with more holes */
2115 if (btrfs_test_opt(root, SSD_SPREAD)) {
2116 min_bytes = bytes + empty_size;
2117 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2118 /*
2119 * we want to do larger allocations when we are
2120 * flushing out the delayed refs, it helps prevent
2121 * making more work as we go along.
2122 */
2123 if (trans->transaction->delayed_refs.flushing)
2124 min_bytes = max(bytes, (bytes + empty_size) >> 1);
2125 else
2126 min_bytes = max(bytes, (bytes + empty_size) >> 4);
2127 } else
2128 min_bytes = max(bytes, (bytes + empty_size) >> 2);
2129
2130 spin_lock(&block_group->tree_lock);
2131
2132 /*
2133 * If we know we don't have enough space to make a cluster don't even
2134 * bother doing all the work to try and find one.
2135 */
2136 if (block_group->free_space < min_bytes) {
2137 spin_unlock(&block_group->tree_lock);
2138 return -ENOSPC;
2128 } 2139 }
2129 2140
2130 cluster->max_size = max_extent; 2141 spin_lock(&cluster->lock);
2131got_it: 2142
2132 ret = 0; 2143 /* someone already found a cluster, hooray */
2133 atomic_inc(&block_group->count); 2144 if (cluster->block_group) {
2134 list_add_tail(&cluster->block_group_list, &block_group->cluster_list); 2145 ret = 0;
2135 cluster->block_group = block_group; 2146 goto out;
2147 }
2148
2149 ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes,
2150 min_bytes);
2151 if (ret)
2152 ret = setup_cluster_bitmap(block_group, cluster, offset,
2153 bytes, min_bytes);
2154
2155 if (!ret) {
2156 atomic_inc(&block_group->count);
2157 list_add_tail(&cluster->block_group_list,
2158 &block_group->cluster_list);
2159 cluster->block_group = block_group;
2160 }
2136out: 2161out:
2137 spin_unlock(&cluster->lock); 2162 spin_unlock(&cluster->lock);
2138 spin_unlock(&block_group->tree_lock); 2163 spin_unlock(&block_group->tree_lock);
@@ -2149,8 +2174,99 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2149 spin_lock_init(&cluster->refill_lock); 2174 spin_lock_init(&cluster->refill_lock);
2150 cluster->root = RB_ROOT; 2175 cluster->root = RB_ROOT;
2151 cluster->max_size = 0; 2176 cluster->max_size = 0;
2152 cluster->points_to_bitmap = false;
2153 INIT_LIST_HEAD(&cluster->block_group_list); 2177 INIT_LIST_HEAD(&cluster->block_group_list);
2154 cluster->block_group = NULL; 2178 cluster->block_group = NULL;
2155} 2179}
2156 2180
2181int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2182 u64 *trimmed, u64 start, u64 end, u64 minlen)
2183{
2184 struct btrfs_free_space *entry = NULL;
2185 struct btrfs_fs_info *fs_info = block_group->fs_info;
2186 u64 bytes = 0;
2187 u64 actually_trimmed;
2188 int ret = 0;
2189
2190 *trimmed = 0;
2191
2192 while (start < end) {
2193 spin_lock(&block_group->tree_lock);
2194
2195 if (block_group->free_space < minlen) {
2196 spin_unlock(&block_group->tree_lock);
2197 break;
2198 }
2199
2200 entry = tree_search_offset(block_group, start, 0, 1);
2201 if (!entry)
2202 entry = tree_search_offset(block_group,
2203 offset_to_bitmap(block_group,
2204 start),
2205 1, 1);
2206
2207 if (!entry || entry->offset >= end) {
2208 spin_unlock(&block_group->tree_lock);
2209 break;
2210 }
2211
2212 if (entry->bitmap) {
2213 ret = search_bitmap(block_group, entry, &start, &bytes);
2214 if (!ret) {
2215 if (start >= end) {
2216 spin_unlock(&block_group->tree_lock);
2217 break;
2218 }
2219 bytes = min(bytes, end - start);
2220 bitmap_clear_bits(block_group, entry,
2221 start, bytes);
2222 if (entry->bytes == 0)
2223 free_bitmap(block_group, entry);
2224 } else {
2225 start = entry->offset + BITS_PER_BITMAP *
2226 block_group->sectorsize;
2227 spin_unlock(&block_group->tree_lock);
2228 ret = 0;
2229 continue;
2230 }
2231 } else {
2232 start = entry->offset;
2233 bytes = min(entry->bytes, end - start);
2234 unlink_free_space(block_group, entry);
2235 kfree(entry);
2236 }
2237
2238 spin_unlock(&block_group->tree_lock);
2239
2240 if (bytes >= minlen) {
2241 int update_ret;
2242 update_ret = btrfs_update_reserved_bytes(block_group,
2243 bytes, 1, 1);
2244
2245 ret = btrfs_error_discard_extent(fs_info->extent_root,
2246 start,
2247 bytes,
2248 &actually_trimmed);
2249
2250 btrfs_add_free_space(block_group,
2251 start, bytes);
2252 if (!update_ret)
2253 btrfs_update_reserved_bytes(block_group,
2254 bytes, 0, 1);
2255
2256 if (ret)
2257 break;
2258 *trimmed += actually_trimmed;
2259 }
2260 start += bytes;
2261 bytes = 0;
2262
2263 if (fatal_signal_pending(current)) {
2264 ret = -ERESTARTSYS;
2265 break;
2266 }
2267
2268 cond_resched();
2269 }
2270
2271 return ret;
2272}
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index e49ca5c321b5..65c3b935289f 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -68,4 +68,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
68int btrfs_return_cluster_to_free_space( 68int btrfs_return_cluster_to_free_space(
69 struct btrfs_block_group_cache *block_group, 69 struct btrfs_block_group_cache *block_group,
70 struct btrfs_free_cluster *cluster); 70 struct btrfs_free_cluster *cluster);
71int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
72 u64 *trimmed, u64 start, u64 end, u64 minlen);
71#endif 73#endif
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index c56eb5909172..c05a08f4c411 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -30,7 +30,8 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid)
30 int slot; 30 int slot;
31 31
32 path = btrfs_alloc_path(); 32 path = btrfs_alloc_path();
33 BUG_ON(!path); 33 if (!path)
34 return -ENOMEM;
34 35
35 search_key.objectid = BTRFS_LAST_FREE_OBJECTID; 36 search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
36 search_key.type = -1; 37 search_key.type = -1;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 512c3d1da083..93c28a1d6bdc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -50,6 +50,7 @@
50#include "tree-log.h" 50#include "tree-log.h"
51#include "compression.h" 51#include "compression.h"
52#include "locking.h" 52#include "locking.h"
53#include "free-space-cache.h"
53 54
54struct btrfs_iget_args { 55struct btrfs_iget_args {
55 u64 ino; 56 u64 ino;
@@ -70,6 +71,7 @@ static struct kmem_cache *btrfs_inode_cachep;
70struct kmem_cache *btrfs_trans_handle_cachep; 71struct kmem_cache *btrfs_trans_handle_cachep;
71struct kmem_cache *btrfs_transaction_cachep; 72struct kmem_cache *btrfs_transaction_cachep;
72struct kmem_cache *btrfs_path_cachep; 73struct kmem_cache *btrfs_path_cachep;
74struct kmem_cache *btrfs_free_space_cachep;
73 75
74#define S_SHIFT 12 76#define S_SHIFT 12
75static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { 77static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
@@ -82,7 +84,8 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
82 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, 84 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
83}; 85};
84 86
85static void btrfs_truncate(struct inode *inode); 87static int btrfs_setsize(struct inode *inode, loff_t newsize);
88static int btrfs_truncate(struct inode *inode);
86static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); 89static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end);
87static noinline int cow_file_range(struct inode *inode, 90static noinline int cow_file_range(struct inode *inode,
88 struct page *locked_page, 91 struct page *locked_page,
@@ -288,6 +291,7 @@ static noinline int add_async_extent(struct async_cow *cow,
288 struct async_extent *async_extent; 291 struct async_extent *async_extent;
289 292
290 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); 293 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
294 BUG_ON(!async_extent);
291 async_extent->start = start; 295 async_extent->start = start;
292 async_extent->ram_size = ram_size; 296 async_extent->ram_size = ram_size;
293 async_extent->compressed_size = compressed_size; 297 async_extent->compressed_size = compressed_size;
@@ -382,9 +386,11 @@ again:
382 */ 386 */
383 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && 387 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) &&
384 (btrfs_test_opt(root, COMPRESS) || 388 (btrfs_test_opt(root, COMPRESS) ||
385 (BTRFS_I(inode)->force_compress))) { 389 (BTRFS_I(inode)->force_compress) ||
390 (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) {
386 WARN_ON(pages); 391 WARN_ON(pages);
387 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 392 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
393 BUG_ON(!pages);
388 394
389 if (BTRFS_I(inode)->force_compress) 395 if (BTRFS_I(inode)->force_compress)
390 compress_type = BTRFS_I(inode)->force_compress; 396 compress_type = BTRFS_I(inode)->force_compress;
@@ -1254,7 +1260,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1254 ret = run_delalloc_nocow(inode, locked_page, start, end, 1260 ret = run_delalloc_nocow(inode, locked_page, start, end,
1255 page_started, 0, nr_written); 1261 page_started, 0, nr_written);
1256 else if (!btrfs_test_opt(root, COMPRESS) && 1262 else if (!btrfs_test_opt(root, COMPRESS) &&
1257 !(BTRFS_I(inode)->force_compress)) 1263 !(BTRFS_I(inode)->force_compress) &&
1264 !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))
1258 ret = cow_file_range(inode, locked_page, start, end, 1265 ret = cow_file_range(inode, locked_page, start, end,
1259 page_started, nr_written, 1); 1266 page_started, nr_written, 1);
1260 else 1267 else
@@ -1461,8 +1468,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1461 if (bio_flags & EXTENT_BIO_COMPRESSED) { 1468 if (bio_flags & EXTENT_BIO_COMPRESSED) {
1462 return btrfs_submit_compressed_read(inode, bio, 1469 return btrfs_submit_compressed_read(inode, bio,
1463 mirror_num, bio_flags); 1470 mirror_num, bio_flags);
1464 } else if (!skip_sum) 1471 } else if (!skip_sum) {
1465 btrfs_lookup_bio_sums(root, inode, bio, NULL); 1472 ret = btrfs_lookup_bio_sums(root, inode, bio, NULL);
1473 if (ret)
1474 return ret;
1475 }
1466 goto mapit; 1476 goto mapit;
1467 } else if (!skip_sum) { 1477 } else if (!skip_sum) {
1468 /* csum items have already been cloned */ 1478 /* csum items have already been cloned */
@@ -1785,6 +1795,8 @@ out:
1785static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, 1795static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
1786 struct extent_state *state, int uptodate) 1796 struct extent_state *state, int uptodate)
1787{ 1797{
1798 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
1799
1788 ClearPagePrivate2(page); 1800 ClearPagePrivate2(page);
1789 return btrfs_finish_ordered_io(page->mapping->host, start, end); 1801 return btrfs_finish_ordered_io(page->mapping->host, start, end);
1790} 1802}
@@ -1895,10 +1907,10 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1895 else 1907 else
1896 rw = READ; 1908 rw = READ;
1897 1909
1898 BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, 1910 ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
1899 failrec->last_mirror, 1911 failrec->last_mirror,
1900 failrec->bio_flags, 0); 1912 failrec->bio_flags, 0);
1901 return 0; 1913 return ret;
1902} 1914}
1903 1915
1904/* 1916/*
@@ -2282,7 +2294,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2282 * this cleans up any orphans that may be left on the list from the last use 2294 * this cleans up any orphans that may be left on the list from the last use
2283 * of this root. 2295 * of this root.
2284 */ 2296 */
2285void btrfs_orphan_cleanup(struct btrfs_root *root) 2297int btrfs_orphan_cleanup(struct btrfs_root *root)
2286{ 2298{
2287 struct btrfs_path *path; 2299 struct btrfs_path *path;
2288 struct extent_buffer *leaf; 2300 struct extent_buffer *leaf;
@@ -2292,10 +2304,13 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2292 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2304 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2293 2305
2294 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) 2306 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
2295 return; 2307 return 0;
2296 2308
2297 path = btrfs_alloc_path(); 2309 path = btrfs_alloc_path();
2298 BUG_ON(!path); 2310 if (!path) {
2311 ret = -ENOMEM;
2312 goto out;
2313 }
2299 path->reada = -1; 2314 path->reada = -1;
2300 2315
2301 key.objectid = BTRFS_ORPHAN_OBJECTID; 2316 key.objectid = BTRFS_ORPHAN_OBJECTID;
@@ -2304,11 +2319,8 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2304 2319
2305 while (1) { 2320 while (1) {
2306 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2321 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2307 if (ret < 0) { 2322 if (ret < 0)
2308 printk(KERN_ERR "Error searching slot for orphan: %d" 2323 goto out;
2309 "\n", ret);
2310 break;
2311 }
2312 2324
2313 /* 2325 /*
2314 * if ret == 0 means we found what we were searching for, which 2326 * if ret == 0 means we found what we were searching for, which
@@ -2316,6 +2328,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2316 * find the key and see if we have stuff that matches 2328 * find the key and see if we have stuff that matches
2317 */ 2329 */
2318 if (ret > 0) { 2330 if (ret > 0) {
2331 ret = 0;
2319 if (path->slots[0] == 0) 2332 if (path->slots[0] == 0)
2320 break; 2333 break;
2321 path->slots[0]--; 2334 path->slots[0]--;
@@ -2343,7 +2356,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2343 found_key.type = BTRFS_INODE_ITEM_KEY; 2356 found_key.type = BTRFS_INODE_ITEM_KEY;
2344 found_key.offset = 0; 2357 found_key.offset = 0;
2345 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); 2358 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
2346 BUG_ON(IS_ERR(inode)); 2359 if (IS_ERR(inode)) {
2360 ret = PTR_ERR(inode);
2361 goto out;
2362 }
2347 2363
2348 /* 2364 /*
2349 * add this inode to the orphan list so btrfs_orphan_del does 2365 * add this inode to the orphan list so btrfs_orphan_del does
@@ -2361,7 +2377,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2361 */ 2377 */
2362 if (is_bad_inode(inode)) { 2378 if (is_bad_inode(inode)) {
2363 trans = btrfs_start_transaction(root, 0); 2379 trans = btrfs_start_transaction(root, 0);
2364 BUG_ON(IS_ERR(trans)); 2380 if (IS_ERR(trans)) {
2381 ret = PTR_ERR(trans);
2382 goto out;
2383 }
2365 btrfs_orphan_del(trans, inode); 2384 btrfs_orphan_del(trans, inode);
2366 btrfs_end_transaction(trans, root); 2385 btrfs_end_transaction(trans, root);
2367 iput(inode); 2386 iput(inode);
@@ -2370,17 +2389,22 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2370 2389
2371 /* if we have links, this was a truncate, lets do that */ 2390 /* if we have links, this was a truncate, lets do that */
2372 if (inode->i_nlink) { 2391 if (inode->i_nlink) {
2392 if (!S_ISREG(inode->i_mode)) {
2393 WARN_ON(1);
2394 iput(inode);
2395 continue;
2396 }
2373 nr_truncate++; 2397 nr_truncate++;
2374 btrfs_truncate(inode); 2398 ret = btrfs_truncate(inode);
2375 } else { 2399 } else {
2376 nr_unlink++; 2400 nr_unlink++;
2377 } 2401 }
2378 2402
2379 /* this will do delete_inode and everything for us */ 2403 /* this will do delete_inode and everything for us */
2380 iput(inode); 2404 iput(inode);
2405 if (ret)
2406 goto out;
2381 } 2407 }
2382 btrfs_free_path(path);
2383
2384 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; 2408 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
2385 2409
2386 if (root->orphan_block_rsv) 2410 if (root->orphan_block_rsv)
@@ -2389,14 +2413,20 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2389 2413
2390 if (root->orphan_block_rsv || root->orphan_item_inserted) { 2414 if (root->orphan_block_rsv || root->orphan_item_inserted) {
2391 trans = btrfs_join_transaction(root, 1); 2415 trans = btrfs_join_transaction(root, 1);
2392 BUG_ON(IS_ERR(trans)); 2416 if (!IS_ERR(trans))
2393 btrfs_end_transaction(trans, root); 2417 btrfs_end_transaction(trans, root);
2394 } 2418 }
2395 2419
2396 if (nr_unlink) 2420 if (nr_unlink)
2397 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); 2421 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
2398 if (nr_truncate) 2422 if (nr_truncate)
2399 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); 2423 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
2424
2425out:
2426 if (ret)
2427 printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret);
2428 btrfs_free_path(path);
2429 return ret;
2400} 2430}
2401 2431
2402/* 2432/*
@@ -2507,6 +2537,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
2507 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 2537 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
2508 2538
2509 alloc_group_block = btrfs_inode_block_group(leaf, inode_item); 2539 alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
2540 if (location.objectid == BTRFS_FREE_SPACE_OBJECTID)
2541 inode->i_mapping->flags &= ~__GFP_FS;
2510 2542
2511 /* 2543 /*
2512 * try to precache a NULL acl entry for files that don't have 2544 * try to precache a NULL acl entry for files that don't have
@@ -2635,10 +2667,10 @@ failed:
2635 * recovery code. It remove a link in a directory with a given name, and 2667 * recovery code. It remove a link in a directory with a given name, and
2636 * also drops the back refs in the inode to the directory 2668 * also drops the back refs in the inode to the directory
2637 */ 2669 */
2638int btrfs_unlink_inode(struct btrfs_trans_handle *trans, 2670static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2639 struct btrfs_root *root, 2671 struct btrfs_root *root,
2640 struct inode *dir, struct inode *inode, 2672 struct inode *dir, struct inode *inode,
2641 const char *name, int name_len) 2673 const char *name, int name_len)
2642{ 2674{
2643 struct btrfs_path *path; 2675 struct btrfs_path *path;
2644 int ret = 0; 2676 int ret = 0;
@@ -2710,12 +2742,25 @@ err:
2710 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 2742 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
2711 inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; 2743 inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
2712 btrfs_update_inode(trans, root, dir); 2744 btrfs_update_inode(trans, root, dir);
2713 btrfs_drop_nlink(inode);
2714 ret = btrfs_update_inode(trans, root, inode);
2715out: 2745out:
2716 return ret; 2746 return ret;
2717} 2747}
2718 2748
2749int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2750 struct btrfs_root *root,
2751 struct inode *dir, struct inode *inode,
2752 const char *name, int name_len)
2753{
2754 int ret;
2755 ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
2756 if (!ret) {
2757 btrfs_drop_nlink(inode);
2758 ret = btrfs_update_inode(trans, root, inode);
2759 }
2760 return ret;
2761}
2762
2763
2719/* helper to check if there is any shared block in the path */ 2764/* helper to check if there is any shared block in the path */
2720static int check_path_shared(struct btrfs_root *root, 2765static int check_path_shared(struct btrfs_root *root,
2721 struct btrfs_path *path) 2766 struct btrfs_path *path)
@@ -3537,7 +3582,13 @@ out:
3537 return ret; 3582 return ret;
3538} 3583}
3539 3584
3540int btrfs_cont_expand(struct inode *inode, loff_t size) 3585/*
3586 * This function puts in dummy file extents for the area we're creating a hole
3587 * for. So if we are truncating this file to a larger size we need to insert
3588 * these file extents so that btrfs_get_extent will return a EXTENT_MAP_HOLE for
3589 * the range between oldsize and size
3590 */
3591int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3541{ 3592{
3542 struct btrfs_trans_handle *trans; 3593 struct btrfs_trans_handle *trans;
3543 struct btrfs_root *root = BTRFS_I(inode)->root; 3594 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3545,7 +3596,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3545 struct extent_map *em = NULL; 3596 struct extent_map *em = NULL;
3546 struct extent_state *cached_state = NULL; 3597 struct extent_state *cached_state = NULL;
3547 u64 mask = root->sectorsize - 1; 3598 u64 mask = root->sectorsize - 1;
3548 u64 hole_start = (inode->i_size + mask) & ~mask; 3599 u64 hole_start = (oldsize + mask) & ~mask;
3549 u64 block_end = (size + mask) & ~mask; 3600 u64 block_end = (size + mask) & ~mask;
3550 u64 last_byte; 3601 u64 last_byte;
3551 u64 cur_offset; 3602 u64 cur_offset;
@@ -3590,13 +3641,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3590 err = btrfs_drop_extents(trans, inode, cur_offset, 3641 err = btrfs_drop_extents(trans, inode, cur_offset,
3591 cur_offset + hole_size, 3642 cur_offset + hole_size,
3592 &hint_byte, 1); 3643 &hint_byte, 1);
3593 BUG_ON(err); 3644 if (err)
3645 break;
3594 3646
3595 err = btrfs_insert_file_extent(trans, root, 3647 err = btrfs_insert_file_extent(trans, root,
3596 inode->i_ino, cur_offset, 0, 3648 inode->i_ino, cur_offset, 0,
3597 0, hole_size, 0, hole_size, 3649 0, hole_size, 0, hole_size,
3598 0, 0, 0); 3650 0, 0, 0);
3599 BUG_ON(err); 3651 if (err)
3652 break;
3600 3653
3601 btrfs_drop_extent_cache(inode, hole_start, 3654 btrfs_drop_extent_cache(inode, hole_start,
3602 last_byte - 1, 0); 3655 last_byte - 1, 0);
@@ -3616,81 +3669,41 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3616 return err; 3669 return err;
3617} 3670}
3618 3671
3619static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) 3672static int btrfs_setsize(struct inode *inode, loff_t newsize)
3620{ 3673{
3621 struct btrfs_root *root = BTRFS_I(inode)->root; 3674 loff_t oldsize = i_size_read(inode);
3622 struct btrfs_trans_handle *trans;
3623 unsigned long nr;
3624 int ret; 3675 int ret;
3625 3676
3626 if (attr->ia_size == inode->i_size) 3677 if (newsize == oldsize)
3627 return 0; 3678 return 0;
3628 3679
3629 if (attr->ia_size > inode->i_size) { 3680 if (newsize > oldsize) {
3630 unsigned long limit; 3681 i_size_write(inode, newsize);
3631 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; 3682 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
3632 if (attr->ia_size > inode->i_sb->s_maxbytes) 3683 truncate_pagecache(inode, oldsize, newsize);
3633 return -EFBIG; 3684 ret = btrfs_cont_expand(inode, oldsize, newsize);
3634 if (limit != RLIM_INFINITY && attr->ia_size > limit) {
3635 send_sig(SIGXFSZ, current, 0);
3636 return -EFBIG;
3637 }
3638 }
3639
3640 trans = btrfs_start_transaction(root, 5);
3641 if (IS_ERR(trans))
3642 return PTR_ERR(trans);
3643
3644 btrfs_set_trans_block_group(trans, inode);
3645
3646 ret = btrfs_orphan_add(trans, inode);
3647 BUG_ON(ret);
3648
3649 nr = trans->blocks_used;
3650 btrfs_end_transaction(trans, root);
3651 btrfs_btree_balance_dirty(root, nr);
3652
3653 if (attr->ia_size > inode->i_size) {
3654 ret = btrfs_cont_expand(inode, attr->ia_size);
3655 if (ret) { 3685 if (ret) {
3656 btrfs_truncate(inode); 3686 btrfs_setsize(inode, oldsize);
3657 return ret; 3687 return ret;
3658 } 3688 }
3659 3689
3660 i_size_write(inode, attr->ia_size); 3690 mark_inode_dirty(inode);
3661 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 3691 } else {
3662 3692
3663 trans = btrfs_start_transaction(root, 0); 3693 /*
3664 BUG_ON(IS_ERR(trans)); 3694 * We're truncating a file that used to have good data down to
3665 btrfs_set_trans_block_group(trans, inode); 3695 * zero. Make sure it gets into the ordered flush list so that
3666 trans->block_rsv = root->orphan_block_rsv; 3696 * any new writes get down to disk quickly.
3667 BUG_ON(!trans->block_rsv); 3697 */
3698 if (newsize == 0)
3699 BTRFS_I(inode)->ordered_data_close = 1;
3668 3700
3669 ret = btrfs_update_inode(trans, root, inode); 3701 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3670 BUG_ON(ret); 3702 truncate_setsize(inode, newsize);
3671 if (inode->i_nlink > 0) { 3703 ret = btrfs_truncate(inode);
3672 ret = btrfs_orphan_del(trans, inode);
3673 BUG_ON(ret);
3674 }
3675 nr = trans->blocks_used;
3676 btrfs_end_transaction(trans, root);
3677 btrfs_btree_balance_dirty(root, nr);
3678 return 0;
3679 } 3704 }
3680 3705
3681 /* 3706 return ret;
3682 * We're truncating a file that used to have good data down to
3683 * zero. Make sure it gets into the ordered flush list so that
3684 * any new writes get down to disk quickly.
3685 */
3686 if (attr->ia_size == 0)
3687 BTRFS_I(inode)->ordered_data_close = 1;
3688
3689 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3690 ret = vmtruncate(inode, attr->ia_size);
3691 BUG_ON(ret);
3692
3693 return 0;
3694} 3707}
3695 3708
3696static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) 3709static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
@@ -3707,7 +3720,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3707 return err; 3720 return err;
3708 3721
3709 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 3722 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
3710 err = btrfs_setattr_size(inode, attr); 3723 err = btrfs_setsize(inode, attr->ia_size);
3711 if (err) 3724 if (err)
3712 return err; 3725 return err;
3713 } 3726 }
@@ -3730,6 +3743,8 @@ void btrfs_evict_inode(struct inode *inode)
3730 unsigned long nr; 3743 unsigned long nr;
3731 int ret; 3744 int ret;
3732 3745
3746 trace_btrfs_inode_evict(inode);
3747
3733 truncate_inode_pages(&inode->i_data, 0); 3748 truncate_inode_pages(&inode->i_data, 0);
3734 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || 3749 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
3735 root == root->fs_info->tree_root)) 3750 root == root->fs_info->tree_root))
@@ -4072,7 +4087,6 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
4072 BTRFS_I(inode)->root = root; 4087 BTRFS_I(inode)->root = root;
4073 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); 4088 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
4074 btrfs_read_locked_inode(inode); 4089 btrfs_read_locked_inode(inode);
4075
4076 inode_tree_add(inode); 4090 inode_tree_add(inode);
4077 unlock_new_inode(inode); 4091 unlock_new_inode(inode);
4078 if (new) 4092 if (new)
@@ -4147,8 +4161,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
4147 if (!IS_ERR(inode) && root != sub_root) { 4161 if (!IS_ERR(inode) && root != sub_root) {
4148 down_read(&root->fs_info->cleanup_work_sem); 4162 down_read(&root->fs_info->cleanup_work_sem);
4149 if (!(inode->i_sb->s_flags & MS_RDONLY)) 4163 if (!(inode->i_sb->s_flags & MS_RDONLY))
4150 btrfs_orphan_cleanup(sub_root); 4164 ret = btrfs_orphan_cleanup(sub_root);
4151 up_read(&root->fs_info->cleanup_work_sem); 4165 up_read(&root->fs_info->cleanup_work_sem);
4166 if (ret)
4167 inode = ERR_PTR(ret);
4152 } 4168 }
4153 4169
4154 return inode; 4170 return inode;
@@ -4282,6 +4298,9 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4282 while (di_cur < di_total) { 4298 while (di_cur < di_total) {
4283 struct btrfs_key location; 4299 struct btrfs_key location;
4284 4300
4301 if (verify_dir_item(root, leaf, di))
4302 break;
4303
4285 name_len = btrfs_dir_name_len(leaf, di); 4304 name_len = btrfs_dir_name_len(leaf, di);
4286 if (name_len <= sizeof(tmp_name)) { 4305 if (name_len <= sizeof(tmp_name)) {
4287 name_ptr = tmp_name; 4306 name_ptr = tmp_name;
@@ -4517,6 +4536,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4517 return ERR_PTR(-ENOMEM); 4536 return ERR_PTR(-ENOMEM);
4518 4537
4519 if (dir) { 4538 if (dir) {
4539 trace_btrfs_inode_request(dir);
4540
4520 ret = btrfs_set_inode_index(dir, index); 4541 ret = btrfs_set_inode_index(dir, index);
4521 if (ret) { 4542 if (ret) {
4522 iput(inode); 4543 iput(inode);
@@ -4585,12 +4606,16 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4585 if ((mode & S_IFREG)) { 4606 if ((mode & S_IFREG)) {
4586 if (btrfs_test_opt(root, NODATASUM)) 4607 if (btrfs_test_opt(root, NODATASUM))
4587 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; 4608 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
4588 if (btrfs_test_opt(root, NODATACOW)) 4609 if (btrfs_test_opt(root, NODATACOW) ||
4610 (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW))
4589 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; 4611 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
4590 } 4612 }
4591 4613
4592 insert_inode_hash(inode); 4614 insert_inode_hash(inode);
4593 inode_tree_add(inode); 4615 inode_tree_add(inode);
4616
4617 trace_btrfs_inode_new(inode);
4618
4594 return inode; 4619 return inode;
4595fail: 4620fail:
4596 if (dir) 4621 if (dir)
@@ -4809,7 +4834,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4809 4834
4810 /* do not allow sys_link's with other subvols of the same device */ 4835 /* do not allow sys_link's with other subvols of the same device */
4811 if (root->objectid != BTRFS_I(inode)->root->objectid) 4836 if (root->objectid != BTRFS_I(inode)->root->objectid)
4812 return -EPERM; 4837 return -EXDEV;
4838
4839 if (inode->i_nlink == ~0U)
4840 return -EMLINK;
4813 4841
4814 btrfs_inc_nlink(inode); 4842 btrfs_inc_nlink(inode);
4815 inode->i_ctime = CURRENT_TIME; 4843 inode->i_ctime = CURRENT_TIME;
@@ -5265,6 +5293,9 @@ insert:
5265 } 5293 }
5266 write_unlock(&em_tree->lock); 5294 write_unlock(&em_tree->lock);
5267out: 5295out:
5296
5297 trace_btrfs_get_extent(root, em);
5298
5268 if (path) 5299 if (path)
5269 btrfs_free_path(path); 5300 btrfs_free_path(path);
5270 if (trans) { 5301 if (trans) {
@@ -5748,6 +5779,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5748 5779
5749 kfree(dip->csums); 5780 kfree(dip->csums);
5750 kfree(dip); 5781 kfree(dip);
5782
5783 /* If we had a csum failure make sure to clear the uptodate flag */
5784 if (err)
5785 clear_bit(BIO_UPTODATE, &bio->bi_flags);
5751 dio_end_io(bio, err); 5786 dio_end_io(bio, err);
5752} 5787}
5753 5788
@@ -5849,6 +5884,10 @@ out_done:
5849 5884
5850 kfree(dip->csums); 5885 kfree(dip->csums);
5851 kfree(dip); 5886 kfree(dip);
5887
5888 /* If we had an error make sure to clear the uptodate flag */
5889 if (err)
5890 clear_bit(BIO_UPTODATE, &bio->bi_flags);
5852 dio_end_io(bio, err); 5891 dio_end_io(bio, err);
5853} 5892}
5854 5893
@@ -5922,9 +5961,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5922 __btrfs_submit_bio_start_direct_io, 5961 __btrfs_submit_bio_start_direct_io,
5923 __btrfs_submit_bio_done); 5962 __btrfs_submit_bio_done);
5924 goto err; 5963 goto err;
5925 } else if (!skip_sum) 5964 } else if (!skip_sum) {
5926 btrfs_lookup_bio_sums_dio(root, inode, bio, 5965 ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
5927 file_offset, csums); 5966 file_offset, csums);
5967 if (ret)
5968 goto err;
5969 }
5928 5970
5929 ret = btrfs_map_bio(root, rw, bio, 0, 1); 5971 ret = btrfs_map_bio(root, rw, bio, 0, 1);
5930err: 5972err:
@@ -5948,6 +5990,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
5948 int nr_pages = 0; 5990 int nr_pages = 0;
5949 u32 *csums = dip->csums; 5991 u32 *csums = dip->csums;
5950 int ret = 0; 5992 int ret = 0;
5993 int write = rw & REQ_WRITE;
5951 5994
5952 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); 5995 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
5953 if (!bio) 5996 if (!bio)
@@ -5984,7 +6027,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
5984 goto out_err; 6027 goto out_err;
5985 } 6028 }
5986 6029
5987 if (!skip_sum) 6030 /* Write's use the ordered csums */
6031 if (!write && !skip_sum)
5988 csums = csums + nr_pages; 6032 csums = csums + nr_pages;
5989 start_sector += submit_len >> 9; 6033 start_sector += submit_len >> 9;
5990 file_offset += submit_len; 6034 file_offset += submit_len;
@@ -6052,7 +6096,8 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
6052 } 6096 }
6053 dip->csums = NULL; 6097 dip->csums = NULL;
6054 6098
6055 if (!skip_sum) { 6099 /* Write's use the ordered csum stuff, so we don't need dip->csums */
6100 if (!write && !skip_sum) {
6056 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); 6101 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
6057 if (!dip->csums) { 6102 if (!dip->csums) {
6058 kfree(dip); 6103 kfree(dip);
@@ -6474,28 +6519,42 @@ out:
6474 return ret; 6519 return ret;
6475} 6520}
6476 6521
6477static void btrfs_truncate(struct inode *inode) 6522static int btrfs_truncate(struct inode *inode)
6478{ 6523{
6479 struct btrfs_root *root = BTRFS_I(inode)->root; 6524 struct btrfs_root *root = BTRFS_I(inode)->root;
6480 int ret; 6525 int ret;
6526 int err = 0;
6481 struct btrfs_trans_handle *trans; 6527 struct btrfs_trans_handle *trans;
6482 unsigned long nr; 6528 unsigned long nr;
6483 u64 mask = root->sectorsize - 1; 6529 u64 mask = root->sectorsize - 1;
6484 6530
6485 if (!S_ISREG(inode->i_mode)) {
6486 WARN_ON(1);
6487 return;
6488 }
6489
6490 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); 6531 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
6491 if (ret) 6532 if (ret)
6492 return; 6533 return ret;
6493 6534
6494 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 6535 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
6495 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 6536 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
6496 6537
6538 trans = btrfs_start_transaction(root, 5);
6539 if (IS_ERR(trans))
6540 return PTR_ERR(trans);
6541
6542 btrfs_set_trans_block_group(trans, inode);
6543
6544 ret = btrfs_orphan_add(trans, inode);
6545 if (ret) {
6546 btrfs_end_transaction(trans, root);
6547 return ret;
6548 }
6549
6550 nr = trans->blocks_used;
6551 btrfs_end_transaction(trans, root);
6552 btrfs_btree_balance_dirty(root, nr);
6553
6554 /* Now start a transaction for the truncate */
6497 trans = btrfs_start_transaction(root, 0); 6555 trans = btrfs_start_transaction(root, 0);
6498 BUG_ON(IS_ERR(trans)); 6556 if (IS_ERR(trans))
6557 return PTR_ERR(trans);
6499 btrfs_set_trans_block_group(trans, inode); 6558 btrfs_set_trans_block_group(trans, inode);
6500 trans->block_rsv = root->orphan_block_rsv; 6559 trans->block_rsv = root->orphan_block_rsv;
6501 6560
@@ -6522,29 +6581,38 @@ static void btrfs_truncate(struct inode *inode)
6522 while (1) { 6581 while (1) {
6523 if (!trans) { 6582 if (!trans) {
6524 trans = btrfs_start_transaction(root, 0); 6583 trans = btrfs_start_transaction(root, 0);
6525 BUG_ON(IS_ERR(trans)); 6584 if (IS_ERR(trans))
6585 return PTR_ERR(trans);
6526 btrfs_set_trans_block_group(trans, inode); 6586 btrfs_set_trans_block_group(trans, inode);
6527 trans->block_rsv = root->orphan_block_rsv; 6587 trans->block_rsv = root->orphan_block_rsv;
6528 } 6588 }
6529 6589
6530 ret = btrfs_block_rsv_check(trans, root, 6590 ret = btrfs_block_rsv_check(trans, root,
6531 root->orphan_block_rsv, 0, 5); 6591 root->orphan_block_rsv, 0, 5);
6532 if (ret) { 6592 if (ret == -EAGAIN) {
6533 BUG_ON(ret != -EAGAIN);
6534 ret = btrfs_commit_transaction(trans, root); 6593 ret = btrfs_commit_transaction(trans, root);
6535 BUG_ON(ret); 6594 if (ret)
6595 return ret;
6536 trans = NULL; 6596 trans = NULL;
6537 continue; 6597 continue;
6598 } else if (ret) {
6599 err = ret;
6600 break;
6538 } 6601 }
6539 6602
6540 ret = btrfs_truncate_inode_items(trans, root, inode, 6603 ret = btrfs_truncate_inode_items(trans, root, inode,
6541 inode->i_size, 6604 inode->i_size,
6542 BTRFS_EXTENT_DATA_KEY); 6605 BTRFS_EXTENT_DATA_KEY);
6543 if (ret != -EAGAIN) 6606 if (ret != -EAGAIN) {
6607 err = ret;
6544 break; 6608 break;
6609 }
6545 6610
6546 ret = btrfs_update_inode(trans, root, inode); 6611 ret = btrfs_update_inode(trans, root, inode);
6547 BUG_ON(ret); 6612 if (ret) {
6613 err = ret;
6614 break;
6615 }
6548 6616
6549 nr = trans->blocks_used; 6617 nr = trans->blocks_used;
6550 btrfs_end_transaction(trans, root); 6618 btrfs_end_transaction(trans, root);
@@ -6554,16 +6622,27 @@ static void btrfs_truncate(struct inode *inode)
6554 6622
6555 if (ret == 0 && inode->i_nlink > 0) { 6623 if (ret == 0 && inode->i_nlink > 0) {
6556 ret = btrfs_orphan_del(trans, inode); 6624 ret = btrfs_orphan_del(trans, inode);
6557 BUG_ON(ret); 6625 if (ret)
6626 err = ret;
6627 } else if (ret && inode->i_nlink > 0) {
6628 /*
6629 * Failed to do the truncate, remove us from the in memory
6630 * orphan list.
6631 */
6632 ret = btrfs_orphan_del(NULL, inode);
6558 } 6633 }
6559 6634
6560 ret = btrfs_update_inode(trans, root, inode); 6635 ret = btrfs_update_inode(trans, root, inode);
6561 BUG_ON(ret); 6636 if (ret && !err)
6637 err = ret;
6562 6638
6563 nr = trans->blocks_used; 6639 nr = trans->blocks_used;
6564 ret = btrfs_end_transaction_throttle(trans, root); 6640 ret = btrfs_end_transaction_throttle(trans, root);
6565 BUG_ON(ret); 6641 if (ret && !err)
6642 err = ret;
6566 btrfs_btree_balance_dirty(root, nr); 6643 btrfs_btree_balance_dirty(root, nr);
6644
6645 return err;
6567} 6646}
6568 6647
6569/* 6648/*
@@ -6630,9 +6709,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6630 ei->index_cnt = (u64)-1; 6709 ei->index_cnt = (u64)-1;
6631 ei->last_unlink_trans = 0; 6710 ei->last_unlink_trans = 0;
6632 6711
6633 spin_lock_init(&ei->accounting_lock);
6634 atomic_set(&ei->outstanding_extents, 0); 6712 atomic_set(&ei->outstanding_extents, 0);
6635 ei->reserved_extents = 0; 6713 atomic_set(&ei->reserved_extents, 0);
6636 6714
6637 ei->ordered_data_close = 0; 6715 ei->ordered_data_close = 0;
6638 ei->orphan_meta_reserved = 0; 6716 ei->orphan_meta_reserved = 0;
@@ -6668,7 +6746,7 @@ void btrfs_destroy_inode(struct inode *inode)
6668 WARN_ON(!list_empty(&inode->i_dentry)); 6746 WARN_ON(!list_empty(&inode->i_dentry));
6669 WARN_ON(inode->i_data.nrpages); 6747 WARN_ON(inode->i_data.nrpages);
6670 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); 6748 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
6671 WARN_ON(BTRFS_I(inode)->reserved_extents); 6749 WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents));
6672 6750
6673 /* 6751 /*
6674 * This can happen where we create an inode, but somebody else also 6752 * This can happen where we create an inode, but somebody else also
@@ -6760,6 +6838,8 @@ void btrfs_destroy_cachep(void)
6760 kmem_cache_destroy(btrfs_transaction_cachep); 6838 kmem_cache_destroy(btrfs_transaction_cachep);
6761 if (btrfs_path_cachep) 6839 if (btrfs_path_cachep)
6762 kmem_cache_destroy(btrfs_path_cachep); 6840 kmem_cache_destroy(btrfs_path_cachep);
6841 if (btrfs_free_space_cachep)
6842 kmem_cache_destroy(btrfs_free_space_cachep);
6763} 6843}
6764 6844
6765int btrfs_init_cachep(void) 6845int btrfs_init_cachep(void)
@@ -6788,6 +6868,12 @@ int btrfs_init_cachep(void)
6788 if (!btrfs_path_cachep) 6868 if (!btrfs_path_cachep)
6789 goto fail; 6869 goto fail;
6790 6870
6871 btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space_cache",
6872 sizeof(struct btrfs_free_space), 0,
6873 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
6874 if (!btrfs_free_space_cachep)
6875 goto fail;
6876
6791 return 0; 6877 return 0;
6792fail: 6878fail:
6793 btrfs_destroy_cachep(); 6879 btrfs_destroy_cachep();
@@ -6806,6 +6892,26 @@ static int btrfs_getattr(struct vfsmount *mnt,
6806 return 0; 6892 return 0;
6807} 6893}
6808 6894
6895/*
6896 * If a file is moved, it will inherit the cow and compression flags of the new
6897 * directory.
6898 */
6899static void fixup_inode_flags(struct inode *dir, struct inode *inode)
6900{
6901 struct btrfs_inode *b_dir = BTRFS_I(dir);
6902 struct btrfs_inode *b_inode = BTRFS_I(inode);
6903
6904 if (b_dir->flags & BTRFS_INODE_NODATACOW)
6905 b_inode->flags |= BTRFS_INODE_NODATACOW;
6906 else
6907 b_inode->flags &= ~BTRFS_INODE_NODATACOW;
6908
6909 if (b_dir->flags & BTRFS_INODE_COMPRESS)
6910 b_inode->flags |= BTRFS_INODE_COMPRESS;
6911 else
6912 b_inode->flags &= ~BTRFS_INODE_COMPRESS;
6913}
6914
6809static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, 6915static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6810 struct inode *new_dir, struct dentry *new_dentry) 6916 struct inode *new_dir, struct dentry *new_dentry)
6811{ 6917{
@@ -6908,11 +7014,12 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6908 old_dentry->d_name.name, 7014 old_dentry->d_name.name,
6909 old_dentry->d_name.len); 7015 old_dentry->d_name.len);
6910 } else { 7016 } else {
6911 btrfs_inc_nlink(old_dentry->d_inode); 7017 ret = __btrfs_unlink_inode(trans, root, old_dir,
6912 ret = btrfs_unlink_inode(trans, root, old_dir, 7018 old_dentry->d_inode,
6913 old_dentry->d_inode, 7019 old_dentry->d_name.name,
6914 old_dentry->d_name.name, 7020 old_dentry->d_name.len);
6915 old_dentry->d_name.len); 7021 if (!ret)
7022 ret = btrfs_update_inode(trans, root, old_inode);
6916 } 7023 }
6917 BUG_ON(ret); 7024 BUG_ON(ret);
6918 7025
@@ -6939,6 +7046,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6939 } 7046 }
6940 } 7047 }
6941 7048
7049 fixup_inode_flags(new_dir, old_inode);
7050
6942 ret = btrfs_add_link(trans, new_dir, old_inode, 7051 ret = btrfs_add_link(trans, new_dir, old_inode,
6943 new_dentry->d_name.name, 7052 new_dentry->d_name.name,
6944 new_dentry->d_name.len, 0, index); 7053 new_dentry->d_name.len, 0, index);
@@ -7340,7 +7449,6 @@ static const struct address_space_operations btrfs_aops = {
7340 .writepage = btrfs_writepage, 7449 .writepage = btrfs_writepage,
7341 .writepages = btrfs_writepages, 7450 .writepages = btrfs_writepages,
7342 .readpages = btrfs_readpages, 7451 .readpages = btrfs_readpages,
7343 .sync_page = block_sync_page,
7344 .direct_IO = btrfs_direct_IO, 7452 .direct_IO = btrfs_direct_IO,
7345 .invalidatepage = btrfs_invalidatepage, 7453 .invalidatepage = btrfs_invalidatepage,
7346 .releasepage = btrfs_releasepage, 7454 .releasepage = btrfs_releasepage,
@@ -7356,7 +7464,6 @@ static const struct address_space_operations btrfs_symlink_aops = {
7356}; 7464};
7357 7465
7358static const struct inode_operations btrfs_file_inode_operations = { 7466static const struct inode_operations btrfs_file_inode_operations = {
7359 .truncate = btrfs_truncate,
7360 .getattr = btrfs_getattr, 7467 .getattr = btrfs_getattr,
7361 .setattr = btrfs_setattr, 7468 .setattr = btrfs_setattr,
7362 .setxattr = btrfs_setxattr, 7469 .setxattr = btrfs_setxattr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5fdb2abc4fa7..7c07fe26b7cf 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -40,6 +40,7 @@
40#include <linux/xattr.h> 40#include <linux/xattr.h>
41#include <linux/vmalloc.h> 41#include <linux/vmalloc.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/blkdev.h>
43#include "compat.h" 44#include "compat.h"
44#include "ctree.h" 45#include "ctree.h"
45#include "disk-io.h" 46#include "disk-io.h"
@@ -138,6 +139,24 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
138 return 0; 139 return 0;
139} 140}
140 141
142static int check_flags(unsigned int flags)
143{
144 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
145 FS_NOATIME_FL | FS_NODUMP_FL | \
146 FS_SYNC_FL | FS_DIRSYNC_FL | \
147 FS_NOCOMP_FL | FS_COMPR_FL | \
148 FS_NOCOW_FL | FS_COW_FL))
149 return -EOPNOTSUPP;
150
151 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
152 return -EINVAL;
153
154 if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL))
155 return -EINVAL;
156
157 return 0;
158}
159
141static int btrfs_ioctl_setflags(struct file *file, void __user *arg) 160static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
142{ 161{
143 struct inode *inode = file->f_path.dentry->d_inode; 162 struct inode *inode = file->f_path.dentry->d_inode;
@@ -153,12 +172,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
153 if (copy_from_user(&flags, arg, sizeof(flags))) 172 if (copy_from_user(&flags, arg, sizeof(flags)))
154 return -EFAULT; 173 return -EFAULT;
155 174
156 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 175 ret = check_flags(flags);
157 FS_NOATIME_FL | FS_NODUMP_FL | \ 176 if (ret)
158 FS_SYNC_FL | FS_DIRSYNC_FL)) 177 return ret;
159 return -EOPNOTSUPP;
160 178
161 if (!is_owner_or_cap(inode)) 179 if (!inode_owner_or_capable(inode))
162 return -EACCES; 180 return -EACCES;
163 181
164 mutex_lock(&inode->i_mutex); 182 mutex_lock(&inode->i_mutex);
@@ -201,6 +219,22 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
201 else 219 else
202 ip->flags &= ~BTRFS_INODE_DIRSYNC; 220 ip->flags &= ~BTRFS_INODE_DIRSYNC;
203 221
222 /*
223 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
224 * flag may be changed automatically if compression code won't make
225 * things smaller.
226 */
227 if (flags & FS_NOCOMP_FL) {
228 ip->flags &= ~BTRFS_INODE_COMPRESS;
229 ip->flags |= BTRFS_INODE_NOCOMPRESS;
230 } else if (flags & FS_COMPR_FL) {
231 ip->flags |= BTRFS_INODE_COMPRESS;
232 ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
233 }
234 if (flags & FS_NOCOW_FL)
235 ip->flags |= BTRFS_INODE_NODATACOW;
236 else if (flags & FS_COW_FL)
237 ip->flags &= ~BTRFS_INODE_NODATACOW;
204 238
205 trans = btrfs_join_transaction(root, 1); 239 trans = btrfs_join_transaction(root, 1);
206 BUG_ON(IS_ERR(trans)); 240 BUG_ON(IS_ERR(trans));
@@ -213,9 +247,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
213 btrfs_end_transaction(trans, root); 247 btrfs_end_transaction(trans, root);
214 248
215 mnt_drop_write(file->f_path.mnt); 249 mnt_drop_write(file->f_path.mnt);
250
251 ret = 0;
216 out_unlock: 252 out_unlock:
217 mutex_unlock(&inode->i_mutex); 253 mutex_unlock(&inode->i_mutex);
218 return 0; 254 return ret;
219} 255}
220 256
221static int btrfs_ioctl_getversion(struct file *file, int __user *arg) 257static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
@@ -225,6 +261,49 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
225 return put_user(inode->i_generation, arg); 261 return put_user(inode->i_generation, arg);
226} 262}
227 263
264static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
265{
266 struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info;
267 struct btrfs_fs_info *fs_info = root->fs_info;
268 struct btrfs_device *device;
269 struct request_queue *q;
270 struct fstrim_range range;
271 u64 minlen = ULLONG_MAX;
272 u64 num_devices = 0;
273 int ret;
274
275 if (!capable(CAP_SYS_ADMIN))
276 return -EPERM;
277
278 mutex_lock(&fs_info->fs_devices->device_list_mutex);
279 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
280 if (!device->bdev)
281 continue;
282 q = bdev_get_queue(device->bdev);
283 if (blk_queue_discard(q)) {
284 num_devices++;
285 minlen = min((u64)q->limits.discard_granularity,
286 minlen);
287 }
288 }
289 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
290 if (!num_devices)
291 return -EOPNOTSUPP;
292
293 if (copy_from_user(&range, arg, sizeof(range)))
294 return -EFAULT;
295
296 range.minlen = max(range.minlen, minlen);
297 ret = btrfs_trim_fs(root, &range);
298 if (ret < 0)
299 return ret;
300
301 if (copy_to_user(arg, &range, sizeof(range)))
302 return -EFAULT;
303
304 return 0;
305}
306
228static noinline int create_subvol(struct btrfs_root *root, 307static noinline int create_subvol(struct btrfs_root *root,
229 struct dentry *dentry, 308 struct dentry *dentry,
230 char *name, int namelen, 309 char *name, int namelen,
@@ -409,7 +488,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
409 if (ret) 488 if (ret)
410 goto fail; 489 goto fail;
411 490
412 btrfs_orphan_cleanup(pending_snapshot->snap); 491 ret = btrfs_orphan_cleanup(pending_snapshot->snap);
492 if (ret)
493 goto fail;
413 494
414 parent = dget_parent(dentry); 495 parent = dget_parent(dentry);
415 inode = btrfs_lookup_dentry(parent->d_inode, dentry); 496 inode = btrfs_lookup_dentry(parent->d_inode, dentry);
@@ -1077,7 +1158,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1077 if (flags & ~BTRFS_SUBVOL_RDONLY) 1158 if (flags & ~BTRFS_SUBVOL_RDONLY)
1078 return -EOPNOTSUPP; 1159 return -EOPNOTSUPP;
1079 1160
1080 if (!is_owner_or_cap(inode)) 1161 if (!inode_owner_or_capable(inode))
1081 return -EACCES; 1162 return -EACCES;
1082 1163
1083 down_write(&root->fs_info->subvol_sem); 1164 down_write(&root->fs_info->subvol_sem);
@@ -2348,12 +2429,15 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp
2348 struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; 2429 struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
2349 struct btrfs_trans_handle *trans; 2430 struct btrfs_trans_handle *trans;
2350 u64 transid; 2431 u64 transid;
2432 int ret;
2351 2433
2352 trans = btrfs_start_transaction(root, 0); 2434 trans = btrfs_start_transaction(root, 0);
2353 if (IS_ERR(trans)) 2435 if (IS_ERR(trans))
2354 return PTR_ERR(trans); 2436 return PTR_ERR(trans);
2355 transid = trans->transid; 2437 transid = trans->transid;
2356 btrfs_commit_transaction_async(trans, root, 0); 2438 ret = btrfs_commit_transaction_async(trans, root, 0);
2439 if (ret)
2440 return ret;
2357 2441
2358 if (argp) 2442 if (argp)
2359 if (copy_to_user(argp, &transid, sizeof(transid))) 2443 if (copy_to_user(argp, &transid, sizeof(transid)))
@@ -2388,6 +2472,8 @@ long btrfs_ioctl(struct file *file, unsigned int
2388 return btrfs_ioctl_setflags(file, argp); 2472 return btrfs_ioctl_setflags(file, argp);
2389 case FS_IOC_GETVERSION: 2473 case FS_IOC_GETVERSION:
2390 return btrfs_ioctl_getversion(file, argp); 2474 return btrfs_ioctl_getversion(file, argp);
2475 case FITRIM:
2476 return btrfs_ioctl_fitrim(file, argp);
2391 case BTRFS_IOC_SNAP_CREATE: 2477 case BTRFS_IOC_SNAP_CREATE:
2392 return btrfs_ioctl_snap_create(file, argp, 0); 2478 return btrfs_ioctl_snap_create(file, argp, 0);
2393 case BTRFS_IOC_SNAP_CREATE_V2: 2479 case BTRFS_IOC_SNAP_CREATE_V2:
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 083a55477375..a1c940425307 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -202,6 +202,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
202 INIT_LIST_HEAD(&entry->list); 202 INIT_LIST_HEAD(&entry->list);
203 INIT_LIST_HEAD(&entry->root_extent_list); 203 INIT_LIST_HEAD(&entry->root_extent_list);
204 204
205 trace_btrfs_ordered_extent_add(inode, entry);
206
205 spin_lock(&tree->lock); 207 spin_lock(&tree->lock);
206 node = tree_insert(&tree->tree, file_offset, 208 node = tree_insert(&tree->tree, file_offset,
207 &entry->rb_node); 209 &entry->rb_node);
@@ -387,6 +389,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
387 struct list_head *cur; 389 struct list_head *cur;
388 struct btrfs_ordered_sum *sum; 390 struct btrfs_ordered_sum *sum;
389 391
392 trace_btrfs_ordered_extent_put(entry->inode, entry);
393
390 if (atomic_dec_and_test(&entry->refs)) { 394 if (atomic_dec_and_test(&entry->refs)) {
391 while (!list_empty(&entry->list)) { 395 while (!list_empty(&entry->list)) {
392 cur = entry->list.next; 396 cur = entry->list.next;
@@ -420,6 +424,8 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
420 spin_lock(&root->fs_info->ordered_extent_lock); 424 spin_lock(&root->fs_info->ordered_extent_lock);
421 list_del_init(&entry->root_extent_list); 425 list_del_init(&entry->root_extent_list);
422 426
427 trace_btrfs_ordered_extent_remove(inode, entry);
428
423 /* 429 /*
424 * we have no more ordered extents for this inode and 430 * we have no more ordered extents for this inode and
425 * no dirty pages. We can safely remove it from the 431 * no dirty pages. We can safely remove it from the
@@ -585,6 +591,8 @@ void btrfs_start_ordered_extent(struct inode *inode,
585 u64 start = entry->file_offset; 591 u64 start = entry->file_offset;
586 u64 end = start + entry->len - 1; 592 u64 end = start + entry->len - 1;
587 593
594 trace_btrfs_ordered_extent_start(inode, entry);
595
588 /* 596 /*
589 * pages in the range can be dirty, clean or writeback. We 597 * pages in the range can be dirty, clean or writeback. We
590 * start IO on any dirty ones so the wait doesn't stall waiting 598 * start IO on any dirty ones so the wait doesn't stall waiting
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 31ade5802ae8..58250e09eb05 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1724,6 +1724,7 @@ again:
1724 1724
1725 eb = read_tree_block(dest, old_bytenr, blocksize, 1725 eb = read_tree_block(dest, old_bytenr, blocksize,
1726 old_ptr_gen); 1726 old_ptr_gen);
1727 BUG_ON(!eb);
1727 btrfs_tree_lock(eb); 1728 btrfs_tree_lock(eb);
1728 if (cow) { 1729 if (cow) {
1729 ret = btrfs_cow_block(trans, dest, eb, parent, 1730 ret = btrfs_cow_block(trans, dest, eb, parent,
@@ -2513,6 +2514,10 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2513 blocksize = btrfs_level_size(root, node->level); 2514 blocksize = btrfs_level_size(root, node->level);
2514 generation = btrfs_node_ptr_generation(upper->eb, slot); 2515 generation = btrfs_node_ptr_generation(upper->eb, slot);
2515 eb = read_tree_block(root, bytenr, blocksize, generation); 2516 eb = read_tree_block(root, bytenr, blocksize, generation);
2517 if (!eb) {
2518 err = -EIO;
2519 goto next;
2520 }
2516 btrfs_tree_lock(eb); 2521 btrfs_tree_lock(eb);
2517 btrfs_set_lock_blocking(eb); 2522 btrfs_set_lock_blocking(eb);
2518 2523
@@ -2670,6 +2675,7 @@ static int get_tree_block_key(struct reloc_control *rc,
2670 BUG_ON(block->key_ready); 2675 BUG_ON(block->key_ready);
2671 eb = read_tree_block(rc->extent_root, block->bytenr, 2676 eb = read_tree_block(rc->extent_root, block->bytenr,
2672 block->key.objectid, block->key.offset); 2677 block->key.objectid, block->key.offset);
2678 BUG_ON(!eb);
2673 WARN_ON(btrfs_header_level(eb) != block->level); 2679 WARN_ON(btrfs_header_level(eb) != block->level);
2674 if (block->level == 0) 2680 if (block->level == 0)
2675 btrfs_item_key_to_cpu(eb, &block->key, 0); 2681 btrfs_item_key_to_cpu(eb, &block->key, 0);
@@ -4209,7 +4215,7 @@ out:
4209 if (IS_ERR(fs_root)) 4215 if (IS_ERR(fs_root))
4210 err = PTR_ERR(fs_root); 4216 err = PTR_ERR(fs_root);
4211 else 4217 else
4212 btrfs_orphan_cleanup(fs_root); 4218 err = btrfs_orphan_cleanup(fs_root);
4213 } 4219 }
4214 return err; 4220 return err;
4215} 4221}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 6a1086e83ffc..29b2d7c930eb 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -88,7 +88,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
88 search_key.offset = (u64)-1; 88 search_key.offset = (u64)-1;
89 89
90 path = btrfs_alloc_path(); 90 path = btrfs_alloc_path();
91 BUG_ON(!path); 91 if (!path)
92 return -ENOMEM;
92 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); 93 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
93 if (ret < 0) 94 if (ret < 0)
94 goto out; 95 goto out;
@@ -332,7 +333,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
332 struct extent_buffer *leaf; 333 struct extent_buffer *leaf;
333 334
334 path = btrfs_alloc_path(); 335 path = btrfs_alloc_path();
335 BUG_ON(!path); 336 if (!path)
337 return -ENOMEM;
336 ret = btrfs_search_slot(trans, root, key, path, -1, 1); 338 ret = btrfs_search_slot(trans, root, key, path, -1, 1);
337 if (ret < 0) 339 if (ret < 0)
338 goto out; 340 goto out;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d39a9895d932..2edfc039f098 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -52,6 +52,9 @@
52#include "export.h" 52#include "export.h"
53#include "compression.h" 53#include "compression.h"
54 54
55#define CREATE_TRACE_POINTS
56#include <trace/events/btrfs.h>
57
55static const struct super_operations btrfs_super_ops; 58static const struct super_operations btrfs_super_ops;
56 59
57static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, 60static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
@@ -620,6 +623,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
620 struct btrfs_root *root = btrfs_sb(sb); 623 struct btrfs_root *root = btrfs_sb(sb);
621 int ret; 624 int ret;
622 625
626 trace_btrfs_sync_fs(wait);
627
623 if (!wait) { 628 if (!wait) {
624 filemap_flush(root->fs_info->btree_inode->i_mapping); 629 filemap_flush(root->fs_info->btree_inode->i_mapping);
625 return 0; 630 return 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 3d73c8d93bbb..ce48eb59d615 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -57,7 +57,8 @@ static noinline int join_transaction(struct btrfs_root *root)
57 if (!cur_trans) { 57 if (!cur_trans) {
58 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, 58 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
59 GFP_NOFS); 59 GFP_NOFS);
60 BUG_ON(!cur_trans); 60 if (!cur_trans)
61 return -ENOMEM;
61 root->fs_info->generation++; 62 root->fs_info->generation++;
62 cur_trans->num_writers = 1; 63 cur_trans->num_writers = 1;
63 cur_trans->num_joined = 0; 64 cur_trans->num_joined = 0;
@@ -195,7 +196,11 @@ again:
195 wait_current_trans(root); 196 wait_current_trans(root);
196 197
197 ret = join_transaction(root); 198 ret = join_transaction(root);
198 BUG_ON(ret); 199 if (ret < 0) {
200 if (type != TRANS_JOIN_NOLOCK)
201 mutex_unlock(&root->fs_info->trans_mutex);
202 return ERR_PTR(ret);
203 }
199 204
200 cur_trans = root->fs_info->running_transaction; 205 cur_trans = root->fs_info->running_transaction;
201 cur_trans->use_count++; 206 cur_trans->use_count++;
@@ -1156,7 +1161,8 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1156 struct btrfs_transaction *cur_trans; 1161 struct btrfs_transaction *cur_trans;
1157 1162
1158 ac = kmalloc(sizeof(*ac), GFP_NOFS); 1163 ac = kmalloc(sizeof(*ac), GFP_NOFS);
1159 BUG_ON(!ac); 1164 if (!ac)
1165 return -ENOMEM;
1160 1166
1161 INIT_DELAYED_WORK(&ac->work, do_async_commit); 1167 INIT_DELAYED_WORK(&ac->work, do_async_commit);
1162 ac->root = root; 1168 ac->root = root;
@@ -1389,6 +1395,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1389 put_transaction(cur_trans); 1395 put_transaction(cur_trans);
1390 put_transaction(cur_trans); 1396 put_transaction(cur_trans);
1391 1397
1398 trace_btrfs_transaction_commit(root);
1399
1392 mutex_unlock(&root->fs_info->trans_mutex); 1400 mutex_unlock(&root->fs_info->trans_mutex);
1393 1401
1394 if (current->journal_info == trans) 1402 if (current->journal_info == trans)
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index a4bbb854dfd2..c50271ad3157 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -799,12 +799,12 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
799 struct inode *dir; 799 struct inode *dir;
800 int ret; 800 int ret;
801 struct btrfs_inode_ref *ref; 801 struct btrfs_inode_ref *ref;
802 struct btrfs_dir_item *di;
803 struct inode *inode; 802 struct inode *inode;
804 char *name; 803 char *name;
805 int namelen; 804 int namelen;
806 unsigned long ref_ptr; 805 unsigned long ref_ptr;
807 unsigned long ref_end; 806 unsigned long ref_end;
807 int search_done = 0;
808 808
809 /* 809 /*
810 * it is possible that we didn't log all the parent directories 810 * it is possible that we didn't log all the parent directories
@@ -845,7 +845,10 @@ again:
845 * existing back reference, and we don't want to create 845 * existing back reference, and we don't want to create
846 * dangling pointers in the directory. 846 * dangling pointers in the directory.
847 */ 847 */
848conflict_again: 848
849 if (search_done)
850 goto insert;
851
849 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 852 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
850 if (ret == 0) { 853 if (ret == 0) {
851 char *victim_name; 854 char *victim_name;
@@ -886,37 +889,21 @@ conflict_again:
886 ret = btrfs_unlink_inode(trans, root, dir, 889 ret = btrfs_unlink_inode(trans, root, dir,
887 inode, victim_name, 890 inode, victim_name,
888 victim_name_len); 891 victim_name_len);
889 kfree(victim_name);
890 btrfs_release_path(root, path);
891 goto conflict_again;
892 } 892 }
893 kfree(victim_name); 893 kfree(victim_name);
894 ptr = (unsigned long)(victim_ref + 1) + victim_name_len; 894 ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
895 } 895 }
896 BUG_ON(ret); 896 BUG_ON(ret);
897 }
898 btrfs_release_path(root, path);
899
900 /* look for a conflicting sequence number */
901 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
902 btrfs_inode_ref_index(eb, ref),
903 name, namelen, 0);
904 if (di && !IS_ERR(di)) {
905 ret = drop_one_dir_item(trans, root, path, dir, di);
906 BUG_ON(ret);
907 }
908 btrfs_release_path(root, path);
909 897
910 898 /*
911 /* look for a conflicting name */ 899 * NOTE: we have searched root tree and checked the
912 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, 900 * coresponding ref, it does not need to check again.
913 name, namelen, 0); 901 */
914 if (di && !IS_ERR(di)) { 902 search_done = 1;
915 ret = drop_one_dir_item(trans, root, path, dir, di);
916 BUG_ON(ret);
917 } 903 }
918 btrfs_release_path(root, path); 904 btrfs_release_path(root, path);
919 905
906insert:
920 /* insert our name */ 907 /* insert our name */
921 ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, 908 ret = btrfs_add_link(trans, dir, inode, name, namelen, 0,
922 btrfs_inode_ref_index(eb, ref)); 909 btrfs_inode_ref_index(eb, ref));
@@ -1286,6 +1273,8 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
1286 ptr_end = ptr + item_size; 1273 ptr_end = ptr + item_size;
1287 while (ptr < ptr_end) { 1274 while (ptr < ptr_end) {
1288 di = (struct btrfs_dir_item *)ptr; 1275 di = (struct btrfs_dir_item *)ptr;
1276 if (verify_dir_item(root, eb, di))
1277 return -EIO;
1289 name_len = btrfs_dir_name_len(eb, di); 1278 name_len = btrfs_dir_name_len(eb, di);
1290 ret = replay_one_name(trans, root, path, eb, di, key); 1279 ret = replay_one_name(trans, root, path, eb, di, key);
1291 BUG_ON(ret); 1280 BUG_ON(ret);
@@ -1412,6 +1401,11 @@ again:
1412 ptr_end = ptr + item_size; 1401 ptr_end = ptr + item_size;
1413 while (ptr < ptr_end) { 1402 while (ptr < ptr_end) {
1414 di = (struct btrfs_dir_item *)ptr; 1403 di = (struct btrfs_dir_item *)ptr;
1404 if (verify_dir_item(root, eb, di)) {
1405 ret = -EIO;
1406 goto out;
1407 }
1408
1415 name_len = btrfs_dir_name_len(eb, di); 1409 name_len = btrfs_dir_name_len(eb, di);
1416 name = kmalloc(name_len, GFP_NOFS); 1410 name = kmalloc(name_len, GFP_NOFS);
1417 if (!name) { 1411 if (!name) {
@@ -1821,7 +1815,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
1821 int orig_level; 1815 int orig_level;
1822 1816
1823 path = btrfs_alloc_path(); 1817 path = btrfs_alloc_path();
1824 BUG_ON(!path); 1818 if (!path)
1819 return -ENOMEM;
1825 1820
1826 level = btrfs_header_level(log->node); 1821 level = btrfs_header_level(log->node);
1827 orig_level = level; 1822 orig_level = level;
@@ -3107,9 +3102,11 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
3107 .stage = 0, 3102 .stage = 0,
3108 }; 3103 };
3109 3104
3110 fs_info->log_root_recovering = 1;
3111 path = btrfs_alloc_path(); 3105 path = btrfs_alloc_path();
3112 BUG_ON(!path); 3106 if (!path)
3107 return -ENOMEM;
3108
3109 fs_info->log_root_recovering = 1;
3113 3110
3114 trans = btrfs_start_transaction(fs_info->tree_root, 0); 3111 trans = btrfs_start_transaction(fs_info->tree_root, 0);
3115 BUG_ON(IS_ERR(trans)); 3112 BUG_ON(IS_ERR(trans));
@@ -3117,7 +3114,8 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
3117 wc.trans = trans; 3114 wc.trans = trans;
3118 wc.pin = 1; 3115 wc.pin = 1;
3119 3116
3120 walk_log_tree(trans, log_root_tree, &wc); 3117 ret = walk_log_tree(trans, log_root_tree, &wc);
3118 BUG_ON(ret);
3121 3119
3122again: 3120again:
3123 key.objectid = BTRFS_TREE_LOG_OBJECTID; 3121 key.objectid = BTRFS_TREE_LOG_OBJECTID;
@@ -3141,8 +3139,7 @@ again:
3141 3139
3142 log = btrfs_read_fs_root_no_radix(log_root_tree, 3140 log = btrfs_read_fs_root_no_radix(log_root_tree,
3143 &found_key); 3141 &found_key);
3144 BUG_ON(!log); 3142 BUG_ON(IS_ERR(log));
3145
3146 3143
3147 tmp_key.objectid = found_key.offset; 3144 tmp_key.objectid = found_key.offset;
3148 tmp_key.type = BTRFS_ROOT_ITEM_KEY; 3145 tmp_key.type = BTRFS_ROOT_ITEM_KEY;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index dd13eb81ee40..309a57b9fc85 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -33,17 +33,6 @@
33#include "volumes.h" 33#include "volumes.h"
34#include "async-thread.h" 34#include "async-thread.h"
35 35
36struct map_lookup {
37 u64 type;
38 int io_align;
39 int io_width;
40 int stripe_len;
41 int sector_size;
42 int num_stripes;
43 int sub_stripes;
44 struct btrfs_bio_stripe stripes[];
45};
46
47static int init_first_rw_device(struct btrfs_trans_handle *trans, 36static int init_first_rw_device(struct btrfs_trans_handle *trans,
48 struct btrfs_root *root, 37 struct btrfs_root *root,
49 struct btrfs_device *device); 38 struct btrfs_device *device);
@@ -162,7 +151,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
162 struct bio *cur; 151 struct bio *cur;
163 int again = 0; 152 int again = 0;
164 unsigned long num_run; 153 unsigned long num_run;
165 unsigned long num_sync_run;
166 unsigned long batch_run = 0; 154 unsigned long batch_run = 0;
167 unsigned long limit; 155 unsigned long limit;
168 unsigned long last_waited = 0; 156 unsigned long last_waited = 0;
@@ -173,11 +161,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
173 limit = btrfs_async_submit_limit(fs_info); 161 limit = btrfs_async_submit_limit(fs_info);
174 limit = limit * 2 / 3; 162 limit = limit * 2 / 3;
175 163
176 /* we want to make sure that every time we switch from the sync
177 * list to the normal list, we unplug
178 */
179 num_sync_run = 0;
180
181loop: 164loop:
182 spin_lock(&device->io_lock); 165 spin_lock(&device->io_lock);
183 166
@@ -223,15 +206,6 @@ loop_lock:
223 206
224 spin_unlock(&device->io_lock); 207 spin_unlock(&device->io_lock);
225 208
226 /*
227 * if we're doing the regular priority list, make sure we unplug
228 * for any high prio bios we've sent down
229 */
230 if (pending_bios == &device->pending_bios && num_sync_run > 0) {
231 num_sync_run = 0;
232 blk_run_backing_dev(bdi, NULL);
233 }
234
235 while (pending) { 209 while (pending) {
236 210
237 rmb(); 211 rmb();
@@ -259,19 +233,11 @@ loop_lock:
259 233
260 BUG_ON(atomic_read(&cur->bi_cnt) == 0); 234 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
261 235
262 if (cur->bi_rw & REQ_SYNC)
263 num_sync_run++;
264
265 submit_bio(cur->bi_rw, cur); 236 submit_bio(cur->bi_rw, cur);
266 num_run++; 237 num_run++;
267 batch_run++; 238 batch_run++;
268 if (need_resched()) { 239 if (need_resched())
269 if (num_sync_run) {
270 blk_run_backing_dev(bdi, NULL);
271 num_sync_run = 0;
272 }
273 cond_resched(); 240 cond_resched();
274 }
275 241
276 /* 242 /*
277 * we made progress, there is more work to do and the bdi 243 * we made progress, there is more work to do and the bdi
@@ -304,13 +270,8 @@ loop_lock:
304 * against it before looping 270 * against it before looping
305 */ 271 */
306 last_waited = ioc->last_waited; 272 last_waited = ioc->last_waited;
307 if (need_resched()) { 273 if (need_resched())
308 if (num_sync_run) {
309 blk_run_backing_dev(bdi, NULL);
310 num_sync_run = 0;
311 }
312 cond_resched(); 274 cond_resched();
313 }
314 continue; 275 continue;
315 } 276 }
316 spin_lock(&device->io_lock); 277 spin_lock(&device->io_lock);
@@ -323,22 +284,6 @@ loop_lock:
323 } 284 }
324 } 285 }
325 286
326 if (num_sync_run) {
327 num_sync_run = 0;
328 blk_run_backing_dev(bdi, NULL);
329 }
330 /*
331 * IO has already been through a long path to get here. Checksumming,
332 * async helper threads, perhaps compression. We've done a pretty
333 * good job of collecting a batch of IO and should just unplug
334 * the device right away.
335 *
336 * This will help anyone who is waiting on the IO, they might have
337 * already unplugged, but managed to do so before the bio they
338 * cared about found its way down here.
339 */
340 blk_run_backing_dev(bdi, NULL);
341
342 cond_resched(); 287 cond_resched();
343 if (again) 288 if (again)
344 goto loop; 289 goto loop;
@@ -1923,6 +1868,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1923 1868
1924 BUG_ON(ret); 1869 BUG_ON(ret);
1925 1870
1871 trace_btrfs_chunk_free(root, map, chunk_offset, em->len);
1872
1926 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { 1873 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
1927 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); 1874 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
1928 BUG_ON(ret); 1875 BUG_ON(ret);
@@ -2650,6 +2597,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2650 *num_bytes = chunk_bytes_by_type(type, calc_size, 2597 *num_bytes = chunk_bytes_by_type(type, calc_size,
2651 map->num_stripes, sub_stripes); 2598 map->num_stripes, sub_stripes);
2652 2599
2600 trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes);
2601
2653 em = alloc_extent_map(GFP_NOFS); 2602 em = alloc_extent_map(GFP_NOFS);
2654 if (!em) { 2603 if (!em) {
2655 ret = -ENOMEM; 2604 ret = -ENOMEM;
@@ -2758,6 +2707,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
2758 item_size); 2707 item_size);
2759 BUG_ON(ret); 2708 BUG_ON(ret);
2760 } 2709 }
2710
2761 kfree(chunk); 2711 kfree(chunk);
2762 return 0; 2712 return 0;
2763} 2713}
@@ -2955,14 +2905,17 @@ static int find_live_mirror(struct map_lookup *map, int first, int num,
2955static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, 2905static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
2956 u64 logical, u64 *length, 2906 u64 logical, u64 *length,
2957 struct btrfs_multi_bio **multi_ret, 2907 struct btrfs_multi_bio **multi_ret,
2958 int mirror_num, struct page *unplug_page) 2908 int mirror_num)
2959{ 2909{
2960 struct extent_map *em; 2910 struct extent_map *em;
2961 struct map_lookup *map; 2911 struct map_lookup *map;
2962 struct extent_map_tree *em_tree = &map_tree->map_tree; 2912 struct extent_map_tree *em_tree = &map_tree->map_tree;
2963 u64 offset; 2913 u64 offset;
2964 u64 stripe_offset; 2914 u64 stripe_offset;
2915 u64 stripe_end_offset;
2965 u64 stripe_nr; 2916 u64 stripe_nr;
2917 u64 stripe_nr_orig;
2918 u64 stripe_nr_end;
2966 int stripes_allocated = 8; 2919 int stripes_allocated = 8;
2967 int stripes_required = 1; 2920 int stripes_required = 1;
2968 int stripe_index; 2921 int stripe_index;
@@ -2971,7 +2924,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
2971 int max_errors = 0; 2924 int max_errors = 0;
2972 struct btrfs_multi_bio *multi = NULL; 2925 struct btrfs_multi_bio *multi = NULL;
2973 2926
2974 if (multi_ret && !(rw & REQ_WRITE)) 2927 if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD)))
2975 stripes_allocated = 1; 2928 stripes_allocated = 1;
2976again: 2929again:
2977 if (multi_ret) { 2930 if (multi_ret) {
@@ -2987,11 +2940,6 @@ again:
2987 em = lookup_extent_mapping(em_tree, logical, *length); 2940 em = lookup_extent_mapping(em_tree, logical, *length);
2988 read_unlock(&em_tree->lock); 2941 read_unlock(&em_tree->lock);
2989 2942
2990 if (!em && unplug_page) {
2991 kfree(multi);
2992 return 0;
2993 }
2994
2995 if (!em) { 2943 if (!em) {
2996 printk(KERN_CRIT "unable to find logical %llu len %llu\n", 2944 printk(KERN_CRIT "unable to find logical %llu len %llu\n",
2997 (unsigned long long)logical, 2945 (unsigned long long)logical,
@@ -3017,7 +2965,15 @@ again:
3017 max_errors = 1; 2965 max_errors = 1;
3018 } 2966 }
3019 } 2967 }
3020 if (multi_ret && (rw & REQ_WRITE) && 2968 if (rw & REQ_DISCARD) {
2969 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
2970 BTRFS_BLOCK_GROUP_RAID1 |
2971 BTRFS_BLOCK_GROUP_DUP |
2972 BTRFS_BLOCK_GROUP_RAID10)) {
2973 stripes_required = map->num_stripes;
2974 }
2975 }
2976 if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) &&
3021 stripes_allocated < stripes_required) { 2977 stripes_allocated < stripes_required) {
3022 stripes_allocated = map->num_stripes; 2978 stripes_allocated = map->num_stripes;
3023 free_extent_map(em); 2979 free_extent_map(em);
@@ -3037,23 +2993,37 @@ again:
3037 /* stripe_offset is the offset of this block in its stripe*/ 2993 /* stripe_offset is the offset of this block in its stripe*/
3038 stripe_offset = offset - stripe_offset; 2994 stripe_offset = offset - stripe_offset;
3039 2995
3040 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | 2996 if (rw & REQ_DISCARD)
3041 BTRFS_BLOCK_GROUP_RAID10 | 2997 *length = min_t(u64, em->len - offset, *length);
3042 BTRFS_BLOCK_GROUP_DUP)) { 2998 else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
2999 BTRFS_BLOCK_GROUP_RAID1 |
3000 BTRFS_BLOCK_GROUP_RAID10 |
3001 BTRFS_BLOCK_GROUP_DUP)) {
3043 /* we limit the length of each bio to what fits in a stripe */ 3002 /* we limit the length of each bio to what fits in a stripe */
3044 *length = min_t(u64, em->len - offset, 3003 *length = min_t(u64, em->len - offset,
3045 map->stripe_len - stripe_offset); 3004 map->stripe_len - stripe_offset);
3046 } else { 3005 } else {
3047 *length = em->len - offset; 3006 *length = em->len - offset;
3048 } 3007 }
3049 3008
3050 if (!multi_ret && !unplug_page) 3009 if (!multi_ret)
3051 goto out; 3010 goto out;
3052 3011
3053 num_stripes = 1; 3012 num_stripes = 1;
3054 stripe_index = 0; 3013 stripe_index = 0;
3055 if (map->type & BTRFS_BLOCK_GROUP_RAID1) { 3014 stripe_nr_orig = stripe_nr;
3056 if (unplug_page || (rw & REQ_WRITE)) 3015 stripe_nr_end = (offset + *length + map->stripe_len - 1) &
3016 (~(map->stripe_len - 1));
3017 do_div(stripe_nr_end, map->stripe_len);
3018 stripe_end_offset = stripe_nr_end * map->stripe_len -
3019 (offset + *length);
3020 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3021 if (rw & REQ_DISCARD)
3022 num_stripes = min_t(u64, map->num_stripes,
3023 stripe_nr_end - stripe_nr_orig);
3024 stripe_index = do_div(stripe_nr, map->num_stripes);
3025 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
3026 if (rw & (REQ_WRITE | REQ_DISCARD))
3057 num_stripes = map->num_stripes; 3027 num_stripes = map->num_stripes;
3058 else if (mirror_num) 3028 else if (mirror_num)
3059 stripe_index = mirror_num - 1; 3029 stripe_index = mirror_num - 1;
@@ -3064,7 +3034,7 @@ again:
3064 } 3034 }
3065 3035
3066 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { 3036 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3067 if (rw & REQ_WRITE) 3037 if (rw & (REQ_WRITE | REQ_DISCARD))
3068 num_stripes = map->num_stripes; 3038 num_stripes = map->num_stripes;
3069 else if (mirror_num) 3039 else if (mirror_num)
3070 stripe_index = mirror_num - 1; 3040 stripe_index = mirror_num - 1;
@@ -3075,8 +3045,12 @@ again:
3075 stripe_index = do_div(stripe_nr, factor); 3045 stripe_index = do_div(stripe_nr, factor);
3076 stripe_index *= map->sub_stripes; 3046 stripe_index *= map->sub_stripes;
3077 3047
3078 if (unplug_page || (rw & REQ_WRITE)) 3048 if (rw & REQ_WRITE)
3079 num_stripes = map->sub_stripes; 3049 num_stripes = map->sub_stripes;
3050 else if (rw & REQ_DISCARD)
3051 num_stripes = min_t(u64, map->sub_stripes *
3052 (stripe_nr_end - stripe_nr_orig),
3053 map->num_stripes);
3080 else if (mirror_num) 3054 else if (mirror_num)
3081 stripe_index += mirror_num - 1; 3055 stripe_index += mirror_num - 1;
3082 else { 3056 else {
@@ -3094,24 +3068,101 @@ again:
3094 } 3068 }
3095 BUG_ON(stripe_index >= map->num_stripes); 3069 BUG_ON(stripe_index >= map->num_stripes);
3096 3070
3097 for (i = 0; i < num_stripes; i++) { 3071 if (rw & REQ_DISCARD) {
3098 if (unplug_page) { 3072 for (i = 0; i < num_stripes; i++) {
3099 struct btrfs_device *device;
3100 struct backing_dev_info *bdi;
3101
3102 device = map->stripes[stripe_index].dev;
3103 if (device->bdev) {
3104 bdi = blk_get_backing_dev_info(device->bdev);
3105 if (bdi->unplug_io_fn)
3106 bdi->unplug_io_fn(bdi, unplug_page);
3107 }
3108 } else {
3109 multi->stripes[i].physical = 3073 multi->stripes[i].physical =
3110 map->stripes[stripe_index].physical + 3074 map->stripes[stripe_index].physical +
3111 stripe_offset + stripe_nr * map->stripe_len; 3075 stripe_offset + stripe_nr * map->stripe_len;
3112 multi->stripes[i].dev = map->stripes[stripe_index].dev; 3076 multi->stripes[i].dev = map->stripes[stripe_index].dev;
3077
3078 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3079 u64 stripes;
3080 u32 last_stripe = 0;
3081 int j;
3082
3083 div_u64_rem(stripe_nr_end - 1,
3084 map->num_stripes,
3085 &last_stripe);
3086
3087 for (j = 0; j < map->num_stripes; j++) {
3088 u32 test;
3089
3090 div_u64_rem(stripe_nr_end - 1 - j,
3091 map->num_stripes, &test);
3092 if (test == stripe_index)
3093 break;
3094 }
3095 stripes = stripe_nr_end - 1 - j;
3096 do_div(stripes, map->num_stripes);
3097 multi->stripes[i].length = map->stripe_len *
3098 (stripes - stripe_nr + 1);
3099
3100 if (i == 0) {
3101 multi->stripes[i].length -=
3102 stripe_offset;
3103 stripe_offset = 0;
3104 }
3105 if (stripe_index == last_stripe)
3106 multi->stripes[i].length -=
3107 stripe_end_offset;
3108 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3109 u64 stripes;
3110 int j;
3111 int factor = map->num_stripes /
3112 map->sub_stripes;
3113 u32 last_stripe = 0;
3114
3115 div_u64_rem(stripe_nr_end - 1,
3116 factor, &last_stripe);
3117 last_stripe *= map->sub_stripes;
3118
3119 for (j = 0; j < factor; j++) {
3120 u32 test;
3121
3122 div_u64_rem(stripe_nr_end - 1 - j,
3123 factor, &test);
3124
3125 if (test ==
3126 stripe_index / map->sub_stripes)
3127 break;
3128 }
3129 stripes = stripe_nr_end - 1 - j;
3130 do_div(stripes, factor);
3131 multi->stripes[i].length = map->stripe_len *
3132 (stripes - stripe_nr + 1);
3133
3134 if (i < map->sub_stripes) {
3135 multi->stripes[i].length -=
3136 stripe_offset;
3137 if (i == map->sub_stripes - 1)
3138 stripe_offset = 0;
3139 }
3140 if (stripe_index >= last_stripe &&
3141 stripe_index <= (last_stripe +
3142 map->sub_stripes - 1)) {
3143 multi->stripes[i].length -=
3144 stripe_end_offset;
3145 }
3146 } else
3147 multi->stripes[i].length = *length;
3148
3149 stripe_index++;
3150 if (stripe_index == map->num_stripes) {
3151 /* This could only happen for RAID0/10 */
3152 stripe_index = 0;
3153 stripe_nr++;
3154 }
3155 }
3156 } else {
3157 for (i = 0; i < num_stripes; i++) {
3158 multi->stripes[i].physical =
3159 map->stripes[stripe_index].physical +
3160 stripe_offset +
3161 stripe_nr * map->stripe_len;
3162 multi->stripes[i].dev =
3163 map->stripes[stripe_index].dev;
3164 stripe_index++;
3113 } 3165 }
3114 stripe_index++;
3115 } 3166 }
3116 if (multi_ret) { 3167 if (multi_ret) {
3117 *multi_ret = multi; 3168 *multi_ret = multi;
@@ -3128,7 +3179,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
3128 struct btrfs_multi_bio **multi_ret, int mirror_num) 3179 struct btrfs_multi_bio **multi_ret, int mirror_num)
3129{ 3180{
3130 return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, 3181 return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
3131 mirror_num, NULL); 3182 mirror_num);
3132} 3183}
3133 3184
3134int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, 3185int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
@@ -3196,14 +3247,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
3196 return 0; 3247 return 0;
3197} 3248}
3198 3249
3199int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
3200 u64 logical, struct page *page)
3201{
3202 u64 length = PAGE_CACHE_SIZE;
3203 return __btrfs_map_block(map_tree, READ, logical, &length,
3204 NULL, 0, page);
3205}
3206
3207static void end_bio_multi_stripe(struct bio *bio, int err) 3250static void end_bio_multi_stripe(struct bio *bio, int err)
3208{ 3251{
3209 struct btrfs_multi_bio *multi = bio->bi_private; 3252 struct btrfs_multi_bio *multi = bio->bi_private;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 7fb59d45fe8c..cc2eadaf7a27 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -126,6 +126,7 @@ struct btrfs_fs_devices {
126struct btrfs_bio_stripe { 126struct btrfs_bio_stripe {
127 struct btrfs_device *dev; 127 struct btrfs_device *dev;
128 u64 physical; 128 u64 physical;
129 u64 length; /* only used for discard mappings */
129}; 130};
130 131
131struct btrfs_multi_bio { 132struct btrfs_multi_bio {
@@ -145,6 +146,17 @@ struct btrfs_device_info {
145 u64 max_avail; 146 u64 max_avail;
146}; 147};
147 148
149struct map_lookup {
150 u64 type;
151 int io_align;
152 int io_width;
153 int stripe_len;
154 int sector_size;
155 int num_stripes;
156 int sub_stripes;
157 struct btrfs_bio_stripe stripes[];
158};
159
148/* Used to sort the devices by max_avail(descending sort) */ 160/* Used to sort the devices by max_avail(descending sort) */
149int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); 161int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2);
150 162
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index d779cefcfd7d..a5303b871b13 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -242,6 +242,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
242 break; 242 break;
243 243
244 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); 244 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
245 if (verify_dir_item(root, leaf, di))
246 continue;
245 247
246 name_len = btrfs_dir_name_len(leaf, di); 248 name_len = btrfs_dir_name_len(leaf, di);
247 total_size += name_len + 1; 249 total_size += name_len + 1;
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index f5ec2d44150d..faccd47c6c46 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -57,7 +57,8 @@ static struct list_head *zlib_alloc_workspace(void)
57 if (!workspace) 57 if (!workspace)
58 return ERR_PTR(-ENOMEM); 58 return ERR_PTR(-ENOMEM);
59 59
60 workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); 60 workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize(
61 MAX_WBITS, MAX_MEM_LEVEL));
61 workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); 62 workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
62 workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); 63 workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
63 if (!workspace->def_strm.workspace || 64 if (!workspace->def_strm.workspace ||
diff --git a/fs/buffer.c b/fs/buffer.c
index 2219a76e2caf..a08bb8e61c6f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -54,23 +54,15 @@ init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
54} 54}
55EXPORT_SYMBOL(init_buffer); 55EXPORT_SYMBOL(init_buffer);
56 56
57static int sync_buffer(void *word) 57static int sleep_on_buffer(void *word)
58{ 58{
59 struct block_device *bd;
60 struct buffer_head *bh
61 = container_of(word, struct buffer_head, b_state);
62
63 smp_mb();
64 bd = bh->b_bdev;
65 if (bd)
66 blk_run_address_space(bd->bd_inode->i_mapping);
67 io_schedule(); 59 io_schedule();
68 return 0; 60 return 0;
69} 61}
70 62
71void __lock_buffer(struct buffer_head *bh) 63void __lock_buffer(struct buffer_head *bh)
72{ 64{
73 wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer, 65 wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
74 TASK_UNINTERRUPTIBLE); 66 TASK_UNINTERRUPTIBLE);
75} 67}
76EXPORT_SYMBOL(__lock_buffer); 68EXPORT_SYMBOL(__lock_buffer);
@@ -90,7 +82,7 @@ EXPORT_SYMBOL(unlock_buffer);
90 */ 82 */
91void __wait_on_buffer(struct buffer_head * bh) 83void __wait_on_buffer(struct buffer_head * bh)
92{ 84{
93 wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); 85 wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
94} 86}
95EXPORT_SYMBOL(__wait_on_buffer); 87EXPORT_SYMBOL(__wait_on_buffer);
96 88
@@ -749,10 +741,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
749{ 741{
750 struct buffer_head *bh; 742 struct buffer_head *bh;
751 struct list_head tmp; 743 struct list_head tmp;
752 struct address_space *mapping, *prev_mapping = NULL; 744 struct address_space *mapping;
753 int err = 0, err2; 745 int err = 0, err2;
746 struct blk_plug plug;
754 747
755 INIT_LIST_HEAD(&tmp); 748 INIT_LIST_HEAD(&tmp);
749 blk_start_plug(&plug);
756 750
757 spin_lock(lock); 751 spin_lock(lock);
758 while (!list_empty(list)) { 752 while (!list_empty(list)) {
@@ -775,7 +769,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
775 * still in flight on potentially older 769 * still in flight on potentially older
776 * contents. 770 * contents.
777 */ 771 */
778 write_dirty_buffer(bh, WRITE_SYNC_PLUG); 772 write_dirty_buffer(bh, WRITE_SYNC);
779 773
780 /* 774 /*
781 * Kick off IO for the previous mapping. Note 775 * Kick off IO for the previous mapping. Note
@@ -783,16 +777,16 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
783 * wait_on_buffer() will do that for us 777 * wait_on_buffer() will do that for us
784 * through sync_buffer(). 778 * through sync_buffer().
785 */ 779 */
786 if (prev_mapping && prev_mapping != mapping)
787 blk_run_address_space(prev_mapping);
788 prev_mapping = mapping;
789
790 brelse(bh); 780 brelse(bh);
791 spin_lock(lock); 781 spin_lock(lock);
792 } 782 }
793 } 783 }
794 } 784 }
795 785
786 spin_unlock(lock);
787 blk_finish_plug(&plug);
788 spin_lock(lock);
789
796 while (!list_empty(&tmp)) { 790 while (!list_empty(&tmp)) {
797 bh = BH_ENTRY(tmp.prev); 791 bh = BH_ENTRY(tmp.prev);
798 get_bh(bh); 792 get_bh(bh);
@@ -1144,7 +1138,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
1144 * inode list. 1138 * inode list.
1145 * 1139 *
1146 * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, 1140 * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
1147 * mapping->tree_lock and the global inode_lock. 1141 * mapping->tree_lock and mapping->host->i_lock.
1148 */ 1142 */
1149void mark_buffer_dirty(struct buffer_head *bh) 1143void mark_buffer_dirty(struct buffer_head *bh)
1150{ 1144{
@@ -1614,14 +1608,8 @@ EXPORT_SYMBOL(unmap_underlying_metadata);
1614 * prevents this contention from occurring. 1608 * prevents this contention from occurring.
1615 * 1609 *
1616 * If block_write_full_page() is called with wbc->sync_mode == 1610 * If block_write_full_page() is called with wbc->sync_mode ==
1617 * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this 1611 * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this
1618 * causes the writes to be flagged as synchronous writes, but the 1612 * causes the writes to be flagged as synchronous writes.
1619 * block device queue will NOT be unplugged, since usually many pages
1620 * will be pushed to the out before the higher-level caller actually
1621 * waits for the writes to be completed. The various wait functions,
1622 * such as wait_on_writeback_range() will ultimately call sync_page()
1623 * which will ultimately call blk_run_backing_dev(), which will end up
1624 * unplugging the device queue.
1625 */ 1613 */
1626static int __block_write_full_page(struct inode *inode, struct page *page, 1614static int __block_write_full_page(struct inode *inode, struct page *page,
1627 get_block_t *get_block, struct writeback_control *wbc, 1615 get_block_t *get_block, struct writeback_control *wbc,
@@ -1634,7 +1622,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1634 const unsigned blocksize = 1 << inode->i_blkbits; 1622 const unsigned blocksize = 1 << inode->i_blkbits;
1635 int nr_underway = 0; 1623 int nr_underway = 0;
1636 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? 1624 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1637 WRITE_SYNC_PLUG : WRITE); 1625 WRITE_SYNC : WRITE);
1638 1626
1639 BUG_ON(!PageLocked(page)); 1627 BUG_ON(!PageLocked(page));
1640 1628
@@ -3138,17 +3126,6 @@ out:
3138} 3126}
3139EXPORT_SYMBOL(try_to_free_buffers); 3127EXPORT_SYMBOL(try_to_free_buffers);
3140 3128
3141void block_sync_page(struct page *page)
3142{
3143 struct address_space *mapping;
3144
3145 smp_mb();
3146 mapping = page_mapping(page);
3147 if (mapping)
3148 blk_run_backing_dev(mapping->backing_dev_info, page);
3149}
3150EXPORT_SYMBOL(block_sync_page);
3151
3152/* 3129/*
3153 * There are no bdflush tunables left. But distributions are 3130 * There are no bdflush tunables left. But distributions are
3154 * still running obsolete flush daemons, so we terminate them here. 3131 * still running obsolete flush daemons, so we terminate them here.
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 561438b6a50c..37368ba2e67c 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -92,7 +92,7 @@ static int ceph_set_page_dirty(struct page *page)
92 ci->i_head_snapc = ceph_get_snap_context(snapc); 92 ci->i_head_snapc = ceph_get_snap_context(snapc);
93 ++ci->i_wrbuffer_ref_head; 93 ++ci->i_wrbuffer_ref_head;
94 if (ci->i_wrbuffer_ref == 0) 94 if (ci->i_wrbuffer_ref == 0)
95 igrab(inode); 95 ihold(inode);
96 ++ci->i_wrbuffer_ref; 96 ++ci->i_wrbuffer_ref;
97 dout("%p set_page_dirty %p idx %lu head %d/%d -> %d/%d " 97 dout("%p set_page_dirty %p idx %lu head %d/%d -> %d/%d "
98 "snapc %p seq %lld (%d snaps)\n", 98 "snapc %p seq %lld (%d snaps)\n",
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 08f65faac112..0dba6915712b 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -210,8 +210,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
210 if (!fsc->debugfs_congestion_kb) 210 if (!fsc->debugfs_congestion_kb)
211 goto out; 211 goto out;
212 212
213 dout("a\n");
214
215 snprintf(name, sizeof(name), "../../bdi/%s", 213 snprintf(name, sizeof(name), "../../bdi/%s",
216 dev_name(fsc->backing_dev_info.dev)); 214 dev_name(fsc->backing_dev_info.dev));
217 fsc->debugfs_bdi = 215 fsc->debugfs_bdi =
@@ -221,7 +219,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
221 if (!fsc->debugfs_bdi) 219 if (!fsc->debugfs_bdi)
222 goto out; 220 goto out;
223 221
224 dout("b\n");
225 fsc->debugfs_mdsmap = debugfs_create_file("mdsmap", 222 fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
226 0600, 223 0600,
227 fsc->client->debugfs_dir, 224 fsc->client->debugfs_dir,
@@ -230,7 +227,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
230 if (!fsc->debugfs_mdsmap) 227 if (!fsc->debugfs_mdsmap)
231 goto out; 228 goto out;
232 229
233 dout("ca\n");
234 fsc->debugfs_mdsc = debugfs_create_file("mdsc", 230 fsc->debugfs_mdsc = debugfs_create_file("mdsc",
235 0600, 231 0600,
236 fsc->client->debugfs_dir, 232 fsc->client->debugfs_dir,
@@ -239,7 +235,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
239 if (!fsc->debugfs_mdsc) 235 if (!fsc->debugfs_mdsc)
240 goto out; 236 goto out;
241 237
242 dout("da\n");
243 fsc->debugfs_caps = debugfs_create_file("caps", 238 fsc->debugfs_caps = debugfs_create_file("caps",
244 0400, 239 0400,
245 fsc->client->debugfs_dir, 240 fsc->client->debugfs_dir,
@@ -248,7 +243,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
248 if (!fsc->debugfs_caps) 243 if (!fsc->debugfs_caps)
249 goto out; 244 goto out;
250 245
251 dout("ea\n");
252 fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru", 246 fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
253 0600, 247 0600,
254 fsc->client->debugfs_dir, 248 fsc->client->debugfs_dir,
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ebafa65a29b6..1a867a3601ae 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -161,7 +161,7 @@ more:
161 filp->f_pos = di->offset; 161 filp->f_pos = di->offset;
162 err = filldir(dirent, dentry->d_name.name, 162 err = filldir(dirent, dentry->d_name.name,
163 dentry->d_name.len, di->offset, 163 dentry->d_name.len, di->offset,
164 dentry->d_inode->i_ino, 164 ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
165 dentry->d_inode->i_mode >> 12); 165 dentry->d_inode->i_mode >> 12);
166 166
167 if (last) { 167 if (last) {
@@ -245,15 +245,17 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
245 245
246 dout("readdir off 0 -> '.'\n"); 246 dout("readdir off 0 -> '.'\n");
247 if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0), 247 if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0),
248 inode->i_ino, inode->i_mode >> 12) < 0) 248 ceph_translate_ino(inode->i_sb, inode->i_ino),
249 inode->i_mode >> 12) < 0)
249 return 0; 250 return 0;
250 filp->f_pos = 1; 251 filp->f_pos = 1;
251 off = 1; 252 off = 1;
252 } 253 }
253 if (filp->f_pos == 1) { 254 if (filp->f_pos == 1) {
255 ino_t ino = filp->f_dentry->d_parent->d_inode->i_ino;
254 dout("readdir off 1 -> '..'\n"); 256 dout("readdir off 1 -> '..'\n");
255 if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1), 257 if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1),
256 filp->f_dentry->d_parent->d_inode->i_ino, 258 ceph_translate_ino(inode->i_sb, ino),
257 inode->i_mode >> 12) < 0) 259 inode->i_mode >> 12) < 0)
258 return 0; 260 return 0;
259 filp->f_pos = 2; 261 filp->f_pos = 2;
@@ -377,7 +379,8 @@ more:
377 if (filldir(dirent, 379 if (filldir(dirent,
378 rinfo->dir_dname[off - fi->offset], 380 rinfo->dir_dname[off - fi->offset],
379 rinfo->dir_dname_len[off - fi->offset], 381 rinfo->dir_dname_len[off - fi->offset],
380 pos, ino, ftype) < 0) { 382 pos,
383 ceph_translate_ino(inode->i_sb, ino), ftype) < 0) {
381 dout("filldir stopping us...\n"); 384 dout("filldir stopping us...\n");
382 return 0; 385 return 0;
383 } 386 }
@@ -1024,14 +1027,13 @@ out_touch:
1024} 1027}
1025 1028
1026/* 1029/*
1027 * When a dentry is released, clear the dir I_COMPLETE if it was part 1030 * Release our ceph_dentry_info.
1028 * of the current dir gen or if this is in the snapshot namespace.
1029 */ 1031 */
1030static void ceph_dentry_release(struct dentry *dentry) 1032static void ceph_d_release(struct dentry *dentry)
1031{ 1033{
1032 struct ceph_dentry_info *di = ceph_dentry(dentry); 1034 struct ceph_dentry_info *di = ceph_dentry(dentry);
1033 1035
1034 dout("dentry_release %p\n", dentry); 1036 dout("d_release %p\n", dentry);
1035 if (di) { 1037 if (di) {
1036 ceph_dentry_lru_del(dentry); 1038 ceph_dentry_lru_del(dentry);
1037 if (di->lease_session) 1039 if (di->lease_session)
@@ -1256,14 +1258,14 @@ const struct inode_operations ceph_dir_iops = {
1256 1258
1257const struct dentry_operations ceph_dentry_ops = { 1259const struct dentry_operations ceph_dentry_ops = {
1258 .d_revalidate = ceph_d_revalidate, 1260 .d_revalidate = ceph_d_revalidate,
1259 .d_release = ceph_dentry_release, 1261 .d_release = ceph_d_release,
1260}; 1262};
1261 1263
1262const struct dentry_operations ceph_snapdir_dentry_ops = { 1264const struct dentry_operations ceph_snapdir_dentry_ops = {
1263 .d_revalidate = ceph_snapdir_d_revalidate, 1265 .d_revalidate = ceph_snapdir_d_revalidate,
1264 .d_release = ceph_dentry_release, 1266 .d_release = ceph_d_release,
1265}; 1267};
1266 1268
1267const struct dentry_operations ceph_snap_dentry_ops = { 1269const struct dentry_operations ceph_snap_dentry_ops = {
1268 .d_release = ceph_dentry_release, 1270 .d_release = ceph_d_release,
1269}; 1271};
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 7d0e4a82d898..159b512d5a27 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -564,11 +564,19 @@ more:
564 * start_request so that a tid has been assigned. 564 * start_request so that a tid has been assigned.
565 */ 565 */
566 spin_lock(&ci->i_unsafe_lock); 566 spin_lock(&ci->i_unsafe_lock);
567 list_add(&req->r_unsafe_item, &ci->i_unsafe_writes); 567 list_add_tail(&req->r_unsafe_item,
568 &ci->i_unsafe_writes);
568 spin_unlock(&ci->i_unsafe_lock); 569 spin_unlock(&ci->i_unsafe_lock);
569 ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); 570 ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
570 } 571 }
572
571 ret = ceph_osdc_wait_request(&fsc->client->osdc, req); 573 ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
574 if (ret < 0 && req->r_safe_callback) {
575 spin_lock(&ci->i_unsafe_lock);
576 list_del_init(&req->r_unsafe_item);
577 spin_unlock(&ci->i_unsafe_lock);
578 ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
579 }
572 } 580 }
573 581
574 if (file->f_flags & O_DIRECT) 582 if (file->f_flags & O_DIRECT)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 193bfa5e9cbd..b54c97da1c43 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -36,6 +36,13 @@ static void ceph_vmtruncate_work(struct work_struct *work);
36/* 36/*
37 * find or create an inode, given the ceph ino number 37 * find or create an inode, given the ceph ino number
38 */ 38 */
39static int ceph_set_ino_cb(struct inode *inode, void *data)
40{
41 ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
42 inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data);
43 return 0;
44}
45
39struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) 46struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
40{ 47{
41 struct inode *inode; 48 struct inode *inode;
@@ -1030,9 +1037,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1030 dout("fill_trace doing d_move %p -> %p\n", 1037 dout("fill_trace doing d_move %p -> %p\n",
1031 req->r_old_dentry, dn); 1038 req->r_old_dentry, dn);
1032 1039
1033 /* d_move screws up d_subdirs order */
1034 ceph_i_clear(dir, CEPH_I_COMPLETE);
1035
1036 d_move(req->r_old_dentry, dn); 1040 d_move(req->r_old_dentry, dn);
1037 dout(" src %p '%.*s' dst %p '%.*s'\n", 1041 dout(" src %p '%.*s' dst %p '%.*s'\n",
1038 req->r_old_dentry, 1042 req->r_old_dentry,
@@ -1044,12 +1048,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1044 rehashing bug in vfs_rename_dir */ 1048 rehashing bug in vfs_rename_dir */
1045 ceph_invalidate_dentry_lease(dn); 1049 ceph_invalidate_dentry_lease(dn);
1046 1050
1047 /* take overwritten dentry's readdir offset */ 1051 /*
1048 dout("dn %p gets %p offset %lld (old offset %lld)\n", 1052 * d_move() puts the renamed dentry at the end of
1049 req->r_old_dentry, dn, ceph_dentry(dn)->offset, 1053 * d_subdirs. We need to assign it an appropriate
1054 * directory offset so we can behave when holding
1055 * I_COMPLETE.
1056 */
1057 ceph_set_dentry_offset(req->r_old_dentry);
1058 dout("dn %p gets new offset %lld\n", req->r_old_dentry,
1050 ceph_dentry(req->r_old_dentry)->offset); 1059 ceph_dentry(req->r_old_dentry)->offset);
1051 ceph_dentry(req->r_old_dentry)->offset =
1052 ceph_dentry(dn)->offset;
1053 1060
1054 dn = req->r_old_dentry; /* use old_dentry */ 1061 dn = req->r_old_dentry; /* use old_dentry */
1055 in = dn->d_inode; 1062 in = dn->d_inode;
@@ -1809,7 +1816,7 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
1809 err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL); 1816 err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL);
1810 if (!err) { 1817 if (!err) {
1811 generic_fillattr(inode, stat); 1818 generic_fillattr(inode, stat);
1812 stat->ino = inode->i_ino; 1819 stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
1813 if (ceph_snap(inode) != CEPH_NOSNAP) 1820 if (ceph_snap(inode) != CEPH_NOSNAP)
1814 stat->dev = ceph_snap(inode); 1821 stat->dev = ceph_snap(inode);
1815 else 1822 else
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index f40b9139e437..0aee66b92af3 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -463,8 +463,8 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
463 463
464 dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode, 464 dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode,
465 capsnap, snapc); 465 capsnap, snapc);
466 igrab(inode); 466 ihold(inode);
467 467
468 atomic_set(&capsnap->nref, 1); 468 atomic_set(&capsnap->nref, 1);
469 capsnap->ci = ci; 469 capsnap->ci = ci;
470 INIT_LIST_HEAD(&capsnap->ci_item); 470 INIT_LIST_HEAD(&capsnap->ci_item);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 9c5085465a63..a9e78b4a258c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -131,6 +131,7 @@ enum {
131 Opt_rbytes, 131 Opt_rbytes,
132 Opt_norbytes, 132 Opt_norbytes,
133 Opt_noasyncreaddir, 133 Opt_noasyncreaddir,
134 Opt_ino32,
134}; 135};
135 136
136static match_table_t fsopt_tokens = { 137static match_table_t fsopt_tokens = {
@@ -150,6 +151,7 @@ static match_table_t fsopt_tokens = {
150 {Opt_rbytes, "rbytes"}, 151 {Opt_rbytes, "rbytes"},
151 {Opt_norbytes, "norbytes"}, 152 {Opt_norbytes, "norbytes"},
152 {Opt_noasyncreaddir, "noasyncreaddir"}, 153 {Opt_noasyncreaddir, "noasyncreaddir"},
154 {Opt_ino32, "ino32"},
153 {-1, NULL} 155 {-1, NULL}
154}; 156};
155 157
@@ -225,6 +227,9 @@ static int parse_fsopt_token(char *c, void *private)
225 case Opt_noasyncreaddir: 227 case Opt_noasyncreaddir:
226 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 228 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
227 break; 229 break;
230 case Opt_ino32:
231 fsopt->flags |= CEPH_MOUNT_OPT_INO32;
232 break;
228 default: 233 default:
229 BUG_ON(token); 234 BUG_ON(token);
230 } 235 }
@@ -288,7 +293,7 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
288 fsopt->sb_flags = flags; 293 fsopt->sb_flags = flags;
289 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 294 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
290 295
291 fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; 296 fsopt->rsize = CEPH_RSIZE_DEFAULT;
292 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 297 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
293 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 298 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
294 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 299 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
@@ -370,7 +375,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
370 375
371 if (fsopt->wsize) 376 if (fsopt->wsize)
372 seq_printf(m, ",wsize=%d", fsopt->wsize); 377 seq_printf(m, ",wsize=%d", fsopt->wsize);
373 if (fsopt->rsize != CEPH_MOUNT_RSIZE_DEFAULT) 378 if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
374 seq_printf(m, ",rsize=%d", fsopt->rsize); 379 seq_printf(m, ",rsize=%d", fsopt->rsize);
375 if (fsopt->congestion_kb != default_congestion_kb()) 380 if (fsopt->congestion_kb != default_congestion_kb())
376 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 381 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 20b907d76ae2..619fe719968f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -27,6 +27,7 @@
27#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ 27#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */
28#define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ 28#define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */
29#define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ 29#define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */
30#define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */
30 31
31#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) 32#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES)
32 33
@@ -35,6 +36,7 @@
35#define ceph_test_mount_opt(fsc, opt) \ 36#define ceph_test_mount_opt(fsc, opt) \
36 (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt)) 37 (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
37 38
39#define CEPH_RSIZE_DEFAULT (512*1024) /* readahead */
38#define CEPH_MAX_READDIR_DEFAULT 1024 40#define CEPH_MAX_READDIR_DEFAULT 1024
39#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) 41#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024)
40#define CEPH_SNAPDIRNAME_DEFAULT ".snap" 42#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
@@ -319,6 +321,16 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode)
319 return container_of(inode, struct ceph_inode_info, vfs_inode); 321 return container_of(inode, struct ceph_inode_info, vfs_inode);
320} 322}
321 323
324static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
325{
326 return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
327}
328
329static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
330{
331 return (struct ceph_fs_client *)sb->s_fs_info;
332}
333
322static inline struct ceph_vino ceph_vino(struct inode *inode) 334static inline struct ceph_vino ceph_vino(struct inode *inode)
323{ 335{
324 return ceph_inode(inode)->i_vino; 336 return ceph_inode(inode)->i_vino;
@@ -327,19 +339,49 @@ static inline struct ceph_vino ceph_vino(struct inode *inode)
327/* 339/*
328 * ino_t is <64 bits on many architectures, blech. 340 * ino_t is <64 bits on many architectures, blech.
329 * 341 *
330 * don't include snap in ino hash, at least for now. 342 * i_ino (kernel inode) st_ino (userspace)
343 * i386 32 32
344 * x86_64+ino32 64 32
345 * x86_64 64 64
346 */
347static inline u32 ceph_ino_to_ino32(ino_t ino)
348{
349 ino ^= ino >> (sizeof(ino) * 8 - 32);
350 if (!ino)
351 ino = 1;
352 return ino;
353}
354
355/*
356 * kernel i_ino value
331 */ 357 */
332static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) 358static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
333{ 359{
334 ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */ 360 ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */
335#if BITS_PER_LONG == 32 361#if BITS_PER_LONG == 32
336 ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8; 362 ino = ceph_ino_to_ino32(ino);
337 if (!ino)
338 ino = 1;
339#endif 363#endif
340 return ino; 364 return ino;
341} 365}
342 366
367/*
368 * user-visible ino (stat, filldir)
369 */
370#if BITS_PER_LONG == 32
371static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
372{
373 return ino;
374}
375#else
376static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
377{
378 if (ceph_test_mount_opt(ceph_sb_to_client(sb), INO32))
379 ino = ceph_ino_to_ino32(ino);
380 return ino;
381}
382#endif
383
384
343/* for printf-style formatting */ 385/* for printf-style formatting */
344#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap 386#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
345 387
@@ -428,13 +470,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
428 return ((loff_t)frag << 32) | (loff_t)off; 470 return ((loff_t)frag << 32) | (loff_t)off;
429} 471}
430 472
431static inline int ceph_set_ino_cb(struct inode *inode, void *data)
432{
433 ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
434 inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data);
435 return 0;
436}
437
438/* 473/*
439 * caps helpers 474 * caps helpers
440 */ 475 */
@@ -503,15 +538,6 @@ extern void ceph_reservation_status(struct ceph_fs_client *client,
503 int *total, int *avail, int *used, 538 int *total, int *avail, int *used,
504 int *reserved, int *min); 539 int *reserved, int *min);
505 540
506static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
507{
508 return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
509}
510
511static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
512{
513 return (struct ceph_fs_client *)sb->s_fs_info;
514}
515 541
516 542
517/* 543/*
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e964b1cd5dd0..c27d236738fc 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1569,34 +1569,6 @@ int cifs_fsync(struct file *file, int datasync)
1569 return rc; 1569 return rc;
1570} 1570}
1571 1571
1572/* static void cifs_sync_page(struct page *page)
1573{
1574 struct address_space *mapping;
1575 struct inode *inode;
1576 unsigned long index = page->index;
1577 unsigned int rpages = 0;
1578 int rc = 0;
1579
1580 cFYI(1, "sync page %p", page);
1581 mapping = page->mapping;
1582 if (!mapping)
1583 return 0;
1584 inode = mapping->host;
1585 if (!inode)
1586 return; */
1587
1588/* fill in rpages then
1589 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1590
1591/* cFYI(1, "rpages is %d for sync page of Index %ld", rpages, index);
1592
1593#if 0
1594 if (rc < 0)
1595 return rc;
1596 return 0;
1597#endif
1598} */
1599
1600/* 1572/*
1601 * As file closes, flush all cached write data for this inode checking 1573 * As file closes, flush all cached write data for this inode checking
1602 * for write behind errors. 1574 * for write behind errors.
@@ -2510,7 +2482,6 @@ const struct address_space_operations cifs_addr_ops = {
2510 .set_page_dirty = __set_page_dirty_nobuffers, 2482 .set_page_dirty = __set_page_dirty_nobuffers,
2511 .releasepage = cifs_release_page, 2483 .releasepage = cifs_release_page,
2512 .invalidatepage = cifs_invalidate_page, 2484 .invalidatepage = cifs_invalidate_page,
2513 /* .sync_page = cifs_sync_page, */
2514 /* .direct_IO = */ 2485 /* .direct_IO = */
2515}; 2486};
2516 2487
@@ -2528,6 +2499,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
2528 .set_page_dirty = __set_page_dirty_nobuffers, 2499 .set_page_dirty = __set_page_dirty_nobuffers,
2529 .releasepage = cifs_release_page, 2500 .releasepage = cifs_release_page,
2530 .invalidatepage = cifs_invalidate_page, 2501 .invalidatepage = cifs_invalidate_page,
2531 /* .sync_page = cifs_sync_page, */
2532 /* .direct_IO = */ 2502 /* .direct_IO = */
2533}; 2503};
diff --git a/fs/coda/Makefile b/fs/coda/Makefile
index 6c22e61da397..1bab69a0d347 100644
--- a/fs/coda/Makefile
+++ b/fs/coda/Makefile
@@ -9,4 +9,4 @@ coda-objs := psdev.o cache.o cnode.o inode.o dir.o file.o upcall.o \
9 9
10# If you want debugging output, please uncomment the following line. 10# If you want debugging output, please uncomment the following line.
11 11
12# EXTRA_CFLAGS += -DDEBUG -DDEBUG_SMB_MALLOC=1 12# ccflags-y := -DDEBUG -DDEBUG_SMB_MALLOC=1
diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c
index c6405ce3c50e..af56ad56a89a 100644
--- a/fs/coda/sysctl.c
+++ b/fs/coda/sysctl.c
@@ -13,7 +13,6 @@
13 13
14#ifdef CONFIG_SYSCTL 14#ifdef CONFIG_SYSCTL
15static struct ctl_table_header *fs_table_header; 15static struct ctl_table_header *fs_table_header;
16#endif
17 16
18static ctl_table coda_table[] = { 17static ctl_table coda_table[] = {
19 { 18 {
@@ -40,7 +39,6 @@ static ctl_table coda_table[] = {
40 {} 39 {}
41}; 40};
42 41
43#ifdef CONFIG_SYSCTL
44static ctl_table fs_table[] = { 42static ctl_table fs_table[] = {
45 { 43 {
46 .procname = "coda", 44 .procname = "coda",
@@ -49,22 +47,27 @@ static ctl_table fs_table[] = {
49 }, 47 },
50 {} 48 {}
51}; 49};
52#endif
53 50
54void coda_sysctl_init(void) 51void coda_sysctl_init(void)
55{ 52{
56#ifdef CONFIG_SYSCTL
57 if ( !fs_table_header ) 53 if ( !fs_table_header )
58 fs_table_header = register_sysctl_table(fs_table); 54 fs_table_header = register_sysctl_table(fs_table);
59#endif
60} 55}
61 56
62void coda_sysctl_clean(void) 57void coda_sysctl_clean(void)
63{ 58{
64#ifdef CONFIG_SYSCTL
65 if ( fs_table_header ) { 59 if ( fs_table_header ) {
66 unregister_sysctl_table(fs_table_header); 60 unregister_sysctl_table(fs_table_header);
67 fs_table_header = NULL; 61 fs_table_header = NULL;
68 } 62 }
69#endif
70} 63}
64
65#else
66void coda_sysctl_init(void)
67{
68}
69
70void coda_sysctl_clean(void)
71{
72}
73#endif
diff --git a/fs/compat.c b/fs/compat.c
index c6d31a3bab88..72fe6cda9108 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1671,9 +1671,6 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1671 * Update: ERESTARTSYS breaks at least the xview clock binary, so 1671 * Update: ERESTARTSYS breaks at least the xview clock binary, so
1672 * I'm trying ERESTARTNOHAND which restart only when you want to. 1672 * I'm trying ERESTARTNOHAND which restart only when you want to.
1673 */ 1673 */
1674#define MAX_SELECT_SECONDS \
1675 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
1676
1677int compat_core_sys_select(int n, compat_ulong_t __user *inp, 1674int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1678 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1675 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1679 struct timespec *end_time) 1676 struct timespec *end_time)
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 1bb547c9cad6..2f27e578d466 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -479,6 +479,7 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
479 struct dentry *root = sb->s_root; 479 struct dentry *root = sb->s_root;
480 struct pts_fs_info *fsi = DEVPTS_SB(sb); 480 struct pts_fs_info *fsi = DEVPTS_SB(sb);
481 struct pts_mount_opts *opts = &fsi->mount_opts; 481 struct pts_mount_opts *opts = &fsi->mount_opts;
482 int ret = 0;
482 char s[12]; 483 char s[12];
483 484
484 /* We're supposed to be given the slave end of a pty */ 485 /* We're supposed to be given the slave end of a pty */
@@ -501,14 +502,17 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
501 mutex_lock(&root->d_inode->i_mutex); 502 mutex_lock(&root->d_inode->i_mutex);
502 503
503 dentry = d_alloc_name(root, s); 504 dentry = d_alloc_name(root, s);
504 if (!IS_ERR(dentry)) { 505 if (dentry) {
505 d_add(dentry, inode); 506 d_add(dentry, inode);
506 fsnotify_create(root->d_inode, dentry); 507 fsnotify_create(root->d_inode, dentry);
508 } else {
509 iput(inode);
510 ret = -ENOMEM;
507 } 511 }
508 512
509 mutex_unlock(&root->d_inode->i_mutex); 513 mutex_unlock(&root->d_inode->i_mutex);
510 514
511 return 0; 515 return ret;
512} 516}
513 517
514struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number) 518struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number)
@@ -544,17 +548,12 @@ void devpts_pty_kill(struct tty_struct *tty)
544 mutex_lock(&root->d_inode->i_mutex); 548 mutex_lock(&root->d_inode->i_mutex);
545 549
546 dentry = d_find_alias(inode); 550 dentry = d_find_alias(inode);
547 if (IS_ERR(dentry))
548 goto out;
549
550 if (dentry) {
551 inode->i_nlink--;
552 d_delete(dentry);
553 dput(dentry); /* d_alloc_name() in devpts_pty_new() */
554 }
555 551
552 inode->i_nlink--;
553 d_delete(dentry);
554 dput(dentry); /* d_alloc_name() in devpts_pty_new() */
556 dput(dentry); /* d_find_alias above */ 555 dput(dentry); /* d_find_alias above */
557out: 556
558 mutex_unlock(&root->d_inode->i_mutex); 557 mutex_unlock(&root->d_inode->i_mutex);
559} 558}
560 559
diff --git a/fs/direct-io.c b/fs/direct-io.c
index dcb5577cde1d..ac5f164170e3 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1110,11 +1110,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1110 ((rw & READ) || (dio->result == dio->size))) 1110 ((rw & READ) || (dio->result == dio->size)))
1111 ret = -EIOCBQUEUED; 1111 ret = -EIOCBQUEUED;
1112 1112
1113 if (ret != -EIOCBQUEUED) { 1113 if (ret != -EIOCBQUEUED)
1114 /* All IO is now issued, send it on its way */
1115 blk_run_address_space(inode->i_mapping);
1116 dio_await_completion(dio); 1114 dio_await_completion(dio);
1117 }
1118 1115
1119 /* 1116 /*
1120 * Sync will always be dropping the final ref and completing the 1117 * Sync will always be dropping the final ref and completing the
@@ -1176,7 +1173,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1176 struct dio *dio; 1173 struct dio *dio;
1177 1174
1178 if (rw & WRITE) 1175 if (rw & WRITE)
1179 rw = WRITE_ODIRECT_PLUG; 1176 rw = WRITE_ODIRECT;
1180 1177
1181 if (bdev) 1178 if (bdev)
1182 bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); 1179 bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 2195c213ab2f..98b77c89494c 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -8,6 +8,7 @@
8#include <linux/writeback.h> 8#include <linux/writeback.h>
9#include <linux/sysctl.h> 9#include <linux/sysctl.h>
10#include <linux/gfp.h> 10#include <linux/gfp.h>
11#include "internal.h"
11 12
12/* A global variable is a bit ugly, but it keeps the code simple */ 13/* A global variable is a bit ugly, but it keeps the code simple */
13int sysctl_drop_caches; 14int sysctl_drop_caches;
@@ -16,20 +17,23 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
16{ 17{
17 struct inode *inode, *toput_inode = NULL; 18 struct inode *inode, *toput_inode = NULL;
18 19
19 spin_lock(&inode_lock); 20 spin_lock(&inode_sb_list_lock);
20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 21 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
21 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 22 spin_lock(&inode->i_lock);
22 continue; 23 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
23 if (inode->i_mapping->nrpages == 0) 24 (inode->i_mapping->nrpages == 0)) {
25 spin_unlock(&inode->i_lock);
24 continue; 26 continue;
27 }
25 __iget(inode); 28 __iget(inode);
26 spin_unlock(&inode_lock); 29 spin_unlock(&inode->i_lock);
30 spin_unlock(&inode_sb_list_lock);
27 invalidate_mapping_pages(inode->i_mapping, 0, -1); 31 invalidate_mapping_pages(inode->i_mapping, 0, -1);
28 iput(toput_inode); 32 iput(toput_inode);
29 toput_inode = inode; 33 toput_inode = inode;
30 spin_lock(&inode_lock); 34 spin_lock(&inode_sb_list_lock);
31 } 35 }
32 spin_unlock(&inode_lock); 36 spin_unlock(&inode_sb_list_lock);
33 iput(toput_inode); 37 iput(toput_inode);
34} 38}
35 39
@@ -45,7 +49,11 @@ static void drop_slab(void)
45int drop_caches_sysctl_handler(ctl_table *table, int write, 49int drop_caches_sysctl_handler(ctl_table *table, int write,
46 void __user *buffer, size_t *length, loff_t *ppos) 50 void __user *buffer, size_t *length, loff_t *ppos)
47{ 51{
48 proc_dointvec_minmax(table, write, buffer, length, ppos); 52 int ret;
53
54 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
55 if (ret)
56 return ret;
49 if (write) { 57 if (write) {
50 if (sysctl_drop_caches & 1) 58 if (sysctl_drop_caches & 1)
51 iterate_supers(drop_pagecache_sb, NULL); 59 iterate_supers(drop_pagecache_sb, NULL);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index bfd8b680e648..d2a70a4561f9 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -266,7 +266,6 @@ void ecryptfs_destroy_mount_crypt_stat(
266 &mount_crypt_stat->global_auth_tok_list, 266 &mount_crypt_stat->global_auth_tok_list,
267 mount_crypt_stat_list) { 267 mount_crypt_stat_list) {
268 list_del(&auth_tok->mount_crypt_stat_list); 268 list_del(&auth_tok->mount_crypt_stat_list);
269 mount_crypt_stat->num_global_auth_toks--;
270 if (auth_tok->global_auth_tok_key 269 if (auth_tok->global_auth_tok_key
271 && !(auth_tok->flags & ECRYPTFS_AUTH_TOK_INVALID)) 270 && !(auth_tok->flags & ECRYPTFS_AUTH_TOK_INVALID))
272 key_put(auth_tok->global_auth_tok_key); 271 key_put(auth_tok->global_auth_tok_key);
@@ -1389,6 +1388,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1389 rc = -ENOMEM; 1388 rc = -ENOMEM;
1390 goto out; 1389 goto out;
1391 } 1390 }
1391 /* Zeroed page ensures the in-header unencrypted i_size is set to 0 */
1392 rc = ecryptfs_write_headers_virt(virt, virt_len, &size, crypt_stat, 1392 rc = ecryptfs_write_headers_virt(virt, virt_len, &size, crypt_stat,
1393 ecryptfs_dentry); 1393 ecryptfs_dentry);
1394 if (unlikely(rc)) { 1394 if (unlikely(rc)) {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index e00753496e3e..bd3cafd0949d 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -233,7 +233,7 @@ ecryptfs_get_key_payload_data(struct key *key)
233 233
234struct ecryptfs_key_sig { 234struct ecryptfs_key_sig {
235 struct list_head crypt_stat_list; 235 struct list_head crypt_stat_list;
236 char keysig[ECRYPTFS_SIG_SIZE_HEX]; 236 char keysig[ECRYPTFS_SIG_SIZE_HEX + 1];
237}; 237};
238 238
239struct ecryptfs_filename { 239struct ecryptfs_filename {
@@ -257,19 +257,18 @@ struct ecryptfs_filename {
257struct ecryptfs_crypt_stat { 257struct ecryptfs_crypt_stat {
258#define ECRYPTFS_STRUCT_INITIALIZED 0x00000001 258#define ECRYPTFS_STRUCT_INITIALIZED 0x00000001
259#define ECRYPTFS_POLICY_APPLIED 0x00000002 259#define ECRYPTFS_POLICY_APPLIED 0x00000002
260#define ECRYPTFS_NEW_FILE 0x00000004 260#define ECRYPTFS_ENCRYPTED 0x00000004
261#define ECRYPTFS_ENCRYPTED 0x00000008 261#define ECRYPTFS_SECURITY_WARNING 0x00000008
262#define ECRYPTFS_SECURITY_WARNING 0x00000010 262#define ECRYPTFS_ENABLE_HMAC 0x00000010
263#define ECRYPTFS_ENABLE_HMAC 0x00000020 263#define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000020
264#define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000040 264#define ECRYPTFS_KEY_VALID 0x00000040
265#define ECRYPTFS_KEY_VALID 0x00000080 265#define ECRYPTFS_METADATA_IN_XATTR 0x00000080
266#define ECRYPTFS_METADATA_IN_XATTR 0x00000100 266#define ECRYPTFS_VIEW_AS_ENCRYPTED 0x00000100
267#define ECRYPTFS_VIEW_AS_ENCRYPTED 0x00000200 267#define ECRYPTFS_KEY_SET 0x00000200
268#define ECRYPTFS_KEY_SET 0x00000400 268#define ECRYPTFS_ENCRYPT_FILENAMES 0x00000400
269#define ECRYPTFS_ENCRYPT_FILENAMES 0x00000800 269#define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00000800
270#define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00001000 270#define ECRYPTFS_ENCFN_USE_FEK 0x00001000
271#define ECRYPTFS_ENCFN_USE_FEK 0x00002000 271#define ECRYPTFS_UNLINK_SIGS 0x00002000
272#define ECRYPTFS_UNLINK_SIGS 0x00004000
273 u32 flags; 272 u32 flags;
274 unsigned int file_version; 273 unsigned int file_version;
275 size_t iv_bytes; 274 size_t iv_bytes;
@@ -297,7 +296,6 @@ struct ecryptfs_inode_info {
297 struct inode vfs_inode; 296 struct inode vfs_inode;
298 struct inode *wii_inode; 297 struct inode *wii_inode;
299 struct file *lower_file; 298 struct file *lower_file;
300 struct mutex lower_file_mutex;
301 struct ecryptfs_crypt_stat crypt_stat; 299 struct ecryptfs_crypt_stat crypt_stat;
302}; 300};
303 301
@@ -333,7 +331,6 @@ struct ecryptfs_global_auth_tok {
333 u32 flags; 331 u32 flags;
334 struct list_head mount_crypt_stat_list; 332 struct list_head mount_crypt_stat_list;
335 struct key *global_auth_tok_key; 333 struct key *global_auth_tok_key;
336 struct ecryptfs_auth_tok *global_auth_tok;
337 unsigned char sig[ECRYPTFS_SIG_SIZE_HEX + 1]; 334 unsigned char sig[ECRYPTFS_SIG_SIZE_HEX + 1];
338}; 335};
339 336
@@ -380,7 +377,6 @@ struct ecryptfs_mount_crypt_stat {
380 u32 flags; 377 u32 flags;
381 struct list_head global_auth_tok_list; 378 struct list_head global_auth_tok_list;
382 struct mutex global_auth_tok_list_mutex; 379 struct mutex global_auth_tok_list_mutex;
383 size_t num_global_auth_toks;
384 size_t global_default_cipher_key_size; 380 size_t global_default_cipher_key_size;
385 size_t global_default_fn_cipher_key_bytes; 381 size_t global_default_fn_cipher_key_bytes;
386 unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE 382 unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 7d1050e254f9..cedc913d11ba 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -273,7 +273,14 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
273static int 273static int
274ecryptfs_fsync(struct file *file, int datasync) 274ecryptfs_fsync(struct file *file, int datasync)
275{ 275{
276 return vfs_fsync(ecryptfs_file_to_lower(file), datasync); 276 int rc = 0;
277
278 rc = generic_file_fsync(file, datasync);
279 if (rc)
280 goto out;
281 rc = vfs_fsync(ecryptfs_file_to_lower(file), datasync);
282out:
283 return rc;
277} 284}
278 285
279static int ecryptfs_fasync(int fd, struct file *file, int flag) 286static int ecryptfs_fasync(int fd, struct file *file, int flag)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index b592938a84bc..f99051b7adab 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -143,26 +143,6 @@ out:
143} 143}
144 144
145/** 145/**
146 * grow_file
147 * @ecryptfs_dentry: the eCryptfs dentry
148 *
149 * This is the code which will grow the file to its correct size.
150 */
151static int grow_file(struct dentry *ecryptfs_dentry)
152{
153 struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode;
154 char zero_virt[] = { 0x00 };
155 int rc = 0;
156
157 rc = ecryptfs_write(ecryptfs_inode, zero_virt, 0, 1);
158 i_size_write(ecryptfs_inode, 0);
159 rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
160 ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat.flags |=
161 ECRYPTFS_NEW_FILE;
162 return rc;
163}
164
165/**
166 * ecryptfs_initialize_file 146 * ecryptfs_initialize_file
167 * 147 *
168 * Cause the file to be changed from a basic empty file to an ecryptfs 148 * Cause the file to be changed from a basic empty file to an ecryptfs
@@ -181,7 +161,6 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
181 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 161 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
182 goto out; 162 goto out;
183 } 163 }
184 crypt_stat->flags |= ECRYPTFS_NEW_FILE;
185 ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n"); 164 ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n");
186 rc = ecryptfs_new_file_context(ecryptfs_dentry); 165 rc = ecryptfs_new_file_context(ecryptfs_dentry);
187 if (rc) { 166 if (rc) {
@@ -202,9 +181,6 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
202 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc); 181 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
203 goto out; 182 goto out;
204 } 183 }
205 rc = grow_file(ecryptfs_dentry);
206 if (rc)
207 printk(KERN_ERR "Error growing file; rc = [%d]\n", rc);
208out: 184out:
209 return rc; 185 return rc;
210} 186}
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index c1436cff6f2d..03e609c45012 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -65,6 +65,24 @@ static int process_request_key_err(long err_code)
65 return rc; 65 return rc;
66} 66}
67 67
68static int process_find_global_auth_tok_for_sig_err(int err_code)
69{
70 int rc = err_code;
71
72 switch (err_code) {
73 case -ENOENT:
74 ecryptfs_printk(KERN_WARNING, "Missing auth tok\n");
75 break;
76 case -EINVAL:
77 ecryptfs_printk(KERN_WARNING, "Invalid auth tok\n");
78 break;
79 default:
80 rc = process_request_key_err(err_code);
81 break;
82 }
83 return rc;
84}
85
68/** 86/**
69 * ecryptfs_parse_packet_length 87 * ecryptfs_parse_packet_length
70 * @data: Pointer to memory containing length at offset 88 * @data: Pointer to memory containing length at offset
@@ -403,27 +421,120 @@ out:
403 return rc; 421 return rc;
404} 422}
405 423
424/**
425 * ecryptfs_verify_version
426 * @version: The version number to confirm
427 *
428 * Returns zero on good version; non-zero otherwise
429 */
430static int ecryptfs_verify_version(u16 version)
431{
432 int rc = 0;
433 unsigned char major;
434 unsigned char minor;
435
436 major = ((version >> 8) & 0xFF);
437 minor = (version & 0xFF);
438 if (major != ECRYPTFS_VERSION_MAJOR) {
439 ecryptfs_printk(KERN_ERR, "Major version number mismatch. "
440 "Expected [%d]; got [%d]\n",
441 ECRYPTFS_VERSION_MAJOR, major);
442 rc = -EINVAL;
443 goto out;
444 }
445 if (minor != ECRYPTFS_VERSION_MINOR) {
446 ecryptfs_printk(KERN_ERR, "Minor version number mismatch. "
447 "Expected [%d]; got [%d]\n",
448 ECRYPTFS_VERSION_MINOR, minor);
449 rc = -EINVAL;
450 goto out;
451 }
452out:
453 return rc;
454}
455
456/**
457 * ecryptfs_verify_auth_tok_from_key
458 * @auth_tok_key: key containing the authentication token
459 * @auth_tok: authentication token
460 *
461 * Returns zero on valid auth tok; -EINVAL otherwise
462 */
463static int
464ecryptfs_verify_auth_tok_from_key(struct key *auth_tok_key,
465 struct ecryptfs_auth_tok **auth_tok)
466{
467 int rc = 0;
468
469 (*auth_tok) = ecryptfs_get_key_payload_data(auth_tok_key);
470 if (ecryptfs_verify_version((*auth_tok)->version)) {
471 printk(KERN_ERR "Data structure version mismatch. Userspace "
472 "tools must match eCryptfs kernel module with major "
473 "version [%d] and minor version [%d]\n",
474 ECRYPTFS_VERSION_MAJOR, ECRYPTFS_VERSION_MINOR);
475 rc = -EINVAL;
476 goto out;
477 }
478 if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD
479 && (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) {
480 printk(KERN_ERR "Invalid auth_tok structure "
481 "returned from key query\n");
482 rc = -EINVAL;
483 goto out;
484 }
485out:
486 return rc;
487}
488
406static int 489static int
407ecryptfs_find_global_auth_tok_for_sig( 490ecryptfs_find_global_auth_tok_for_sig(
408 struct ecryptfs_global_auth_tok **global_auth_tok, 491 struct key **auth_tok_key,
492 struct ecryptfs_auth_tok **auth_tok,
409 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig) 493 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig)
410{ 494{
411 struct ecryptfs_global_auth_tok *walker; 495 struct ecryptfs_global_auth_tok *walker;
412 int rc = 0; 496 int rc = 0;
413 497
414 (*global_auth_tok) = NULL; 498 (*auth_tok_key) = NULL;
499 (*auth_tok) = NULL;
415 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); 500 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
416 list_for_each_entry(walker, 501 list_for_each_entry(walker,
417 &mount_crypt_stat->global_auth_tok_list, 502 &mount_crypt_stat->global_auth_tok_list,
418 mount_crypt_stat_list) { 503 mount_crypt_stat_list) {
419 if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX) == 0) { 504 if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX))
420 rc = key_validate(walker->global_auth_tok_key); 505 continue;
421 if (!rc) 506
422 (*global_auth_tok) = walker; 507 if (walker->flags & ECRYPTFS_AUTH_TOK_INVALID) {
508 rc = -EINVAL;
423 goto out; 509 goto out;
424 } 510 }
511
512 rc = key_validate(walker->global_auth_tok_key);
513 if (rc) {
514 if (rc == -EKEYEXPIRED)
515 goto out;
516 goto out_invalid_auth_tok;
517 }
518
519 down_write(&(walker->global_auth_tok_key->sem));
520 rc = ecryptfs_verify_auth_tok_from_key(
521 walker->global_auth_tok_key, auth_tok);
522 if (rc)
523 goto out_invalid_auth_tok_unlock;
524
525 (*auth_tok_key) = walker->global_auth_tok_key;
526 key_get(*auth_tok_key);
527 goto out;
425 } 528 }
426 rc = -EINVAL; 529 rc = -ENOENT;
530 goto out;
531out_invalid_auth_tok_unlock:
532 up_write(&(walker->global_auth_tok_key->sem));
533out_invalid_auth_tok:
534 printk(KERN_WARNING "Invalidating auth tok with sig = [%s]\n", sig);
535 walker->flags |= ECRYPTFS_AUTH_TOK_INVALID;
536 key_put(walker->global_auth_tok_key);
537 walker->global_auth_tok_key = NULL;
427out: 538out:
428 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); 539 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
429 return rc; 540 return rc;
@@ -451,14 +562,11 @@ ecryptfs_find_auth_tok_for_sig(
451 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, 562 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
452 char *sig) 563 char *sig)
453{ 564{
454 struct ecryptfs_global_auth_tok *global_auth_tok;
455 int rc = 0; 565 int rc = 0;
456 566
457 (*auth_tok_key) = NULL; 567 rc = ecryptfs_find_global_auth_tok_for_sig(auth_tok_key, auth_tok,
458 (*auth_tok) = NULL; 568 mount_crypt_stat, sig);
459 if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok, 569 if (rc == -ENOENT) {
460 mount_crypt_stat, sig)) {
461
462 /* if the flag ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY is set in the 570 /* if the flag ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY is set in the
463 * mount_crypt_stat structure, we prevent to use auth toks that 571 * mount_crypt_stat structure, we prevent to use auth toks that
464 * are not inserted through the ecryptfs_add_global_auth_tok 572 * are not inserted through the ecryptfs_add_global_auth_tok
@@ -470,8 +578,7 @@ ecryptfs_find_auth_tok_for_sig(
470 578
471 rc = ecryptfs_keyring_auth_tok_for_sig(auth_tok_key, auth_tok, 579 rc = ecryptfs_keyring_auth_tok_for_sig(auth_tok_key, auth_tok,
472 sig); 580 sig);
473 } else 581 }
474 (*auth_tok) = global_auth_tok->global_auth_tok;
475 return rc; 582 return rc;
476} 583}
477 584
@@ -531,6 +638,16 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
531 } 638 }
532 s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; 639 s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
533 (*packet_size) = 0; 640 (*packet_size) = 0;
641 rc = ecryptfs_find_auth_tok_for_sig(
642 &auth_tok_key,
643 &s->auth_tok, mount_crypt_stat,
644 mount_crypt_stat->global_default_fnek_sig);
645 if (rc) {
646 printk(KERN_ERR "%s: Error attempting to find auth tok for "
647 "fnek sig [%s]; rc = [%d]\n", __func__,
648 mount_crypt_stat->global_default_fnek_sig, rc);
649 goto out;
650 }
534 rc = ecryptfs_get_tfm_and_mutex_for_cipher_name( 651 rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(
535 &s->desc.tfm, 652 &s->desc.tfm,
536 &s->tfm_mutex, mount_crypt_stat->global_default_fn_cipher_name); 653 &s->tfm_mutex, mount_crypt_stat->global_default_fn_cipher_name);
@@ -616,16 +733,6 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
616 goto out_free_unlock; 733 goto out_free_unlock;
617 } 734 }
618 dest[s->i++] = s->cipher_code; 735 dest[s->i++] = s->cipher_code;
619 rc = ecryptfs_find_auth_tok_for_sig(
620 &auth_tok_key,
621 &s->auth_tok, mount_crypt_stat,
622 mount_crypt_stat->global_default_fnek_sig);
623 if (rc) {
624 printk(KERN_ERR "%s: Error attempting to find auth tok for "
625 "fnek sig [%s]; rc = [%d]\n", __func__,
626 mount_crypt_stat->global_default_fnek_sig, rc);
627 goto out_free_unlock;
628 }
629 /* TODO: Support other key modules than passphrase for 736 /* TODO: Support other key modules than passphrase for
630 * filename encryption */ 737 * filename encryption */
631 if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) { 738 if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) {
@@ -765,8 +872,10 @@ out_free_unlock:
765out_unlock: 872out_unlock:
766 mutex_unlock(s->tfm_mutex); 873 mutex_unlock(s->tfm_mutex);
767out: 874out:
768 if (auth_tok_key) 875 if (auth_tok_key) {
876 up_write(&(auth_tok_key->sem));
769 key_put(auth_tok_key); 877 key_put(auth_tok_key);
878 }
770 kfree(s); 879 kfree(s);
771 return rc; 880 return rc;
772} 881}
@@ -879,6 +988,15 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
879 __func__, s->cipher_code); 988 __func__, s->cipher_code);
880 goto out; 989 goto out;
881 } 990 }
991 rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
992 &s->auth_tok, mount_crypt_stat,
993 s->fnek_sig_hex);
994 if (rc) {
995 printk(KERN_ERR "%s: Error attempting to find auth tok for "
996 "fnek sig [%s]; rc = [%d]\n", __func__, s->fnek_sig_hex,
997 rc);
998 goto out;
999 }
882 rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&s->desc.tfm, 1000 rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&s->desc.tfm,
883 &s->tfm_mutex, 1001 &s->tfm_mutex,
884 s->cipher_string); 1002 s->cipher_string);
@@ -925,15 +1043,6 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
925 * >= ECRYPTFS_MAX_IV_BYTES. */ 1043 * >= ECRYPTFS_MAX_IV_BYTES. */
926 memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES); 1044 memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
927 s->desc.info = s->iv; 1045 s->desc.info = s->iv;
928 rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
929 &s->auth_tok, mount_crypt_stat,
930 s->fnek_sig_hex);
931 if (rc) {
932 printk(KERN_ERR "%s: Error attempting to find auth tok for "
933 "fnek sig [%s]; rc = [%d]\n", __func__, s->fnek_sig_hex,
934 rc);
935 goto out_free_unlock;
936 }
937 /* TODO: Support other key modules than passphrase for 1046 /* TODO: Support other key modules than passphrase for
938 * filename encryption */ 1047 * filename encryption */
939 if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) { 1048 if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) {
@@ -1002,8 +1111,10 @@ out:
1002 (*filename_size) = 0; 1111 (*filename_size) = 0;
1003 (*filename) = NULL; 1112 (*filename) = NULL;
1004 } 1113 }
1005 if (auth_tok_key) 1114 if (auth_tok_key) {
1115 up_write(&(auth_tok_key->sem));
1006 key_put(auth_tok_key); 1116 key_put(auth_tok_key);
1117 }
1007 kfree(s); 1118 kfree(s);
1008 return rc; 1119 return rc;
1009} 1120}
@@ -1520,38 +1631,6 @@ out:
1520 return rc; 1631 return rc;
1521} 1632}
1522 1633
1523/**
1524 * ecryptfs_verify_version
1525 * @version: The version number to confirm
1526 *
1527 * Returns zero on good version; non-zero otherwise
1528 */
1529static int ecryptfs_verify_version(u16 version)
1530{
1531 int rc = 0;
1532 unsigned char major;
1533 unsigned char minor;
1534
1535 major = ((version >> 8) & 0xFF);
1536 minor = (version & 0xFF);
1537 if (major != ECRYPTFS_VERSION_MAJOR) {
1538 ecryptfs_printk(KERN_ERR, "Major version number mismatch. "
1539 "Expected [%d]; got [%d]\n",
1540 ECRYPTFS_VERSION_MAJOR, major);
1541 rc = -EINVAL;
1542 goto out;
1543 }
1544 if (minor != ECRYPTFS_VERSION_MINOR) {
1545 ecryptfs_printk(KERN_ERR, "Minor version number mismatch. "
1546 "Expected [%d]; got [%d]\n",
1547 ECRYPTFS_VERSION_MINOR, minor);
1548 rc = -EINVAL;
1549 goto out;
1550 }
1551out:
1552 return rc;
1553}
1554
1555int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key, 1634int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
1556 struct ecryptfs_auth_tok **auth_tok, 1635 struct ecryptfs_auth_tok **auth_tok,
1557 char *sig) 1636 char *sig)
@@ -1563,31 +1642,16 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
1563 printk(KERN_ERR "Could not find key with description: [%s]\n", 1642 printk(KERN_ERR "Could not find key with description: [%s]\n",
1564 sig); 1643 sig);
1565 rc = process_request_key_err(PTR_ERR(*auth_tok_key)); 1644 rc = process_request_key_err(PTR_ERR(*auth_tok_key));
1645 (*auth_tok_key) = NULL;
1566 goto out; 1646 goto out;
1567 } 1647 }
1568 (*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key); 1648 down_write(&(*auth_tok_key)->sem);
1569 if (ecryptfs_verify_version((*auth_tok)->version)) { 1649 rc = ecryptfs_verify_auth_tok_from_key(*auth_tok_key, auth_tok);
1570 printk(KERN_ERR
1571 "Data structure version mismatch. "
1572 "Userspace tools must match eCryptfs "
1573 "kernel module with major version [%d] "
1574 "and minor version [%d]\n",
1575 ECRYPTFS_VERSION_MAJOR,
1576 ECRYPTFS_VERSION_MINOR);
1577 rc = -EINVAL;
1578 goto out_release_key;
1579 }
1580 if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD
1581 && (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) {
1582 printk(KERN_ERR "Invalid auth_tok structure "
1583 "returned from key query\n");
1584 rc = -EINVAL;
1585 goto out_release_key;
1586 }
1587out_release_key:
1588 if (rc) { 1650 if (rc) {
1651 up_write(&(*auth_tok_key)->sem);
1589 key_put(*auth_tok_key); 1652 key_put(*auth_tok_key);
1590 (*auth_tok_key) = NULL; 1653 (*auth_tok_key) = NULL;
1654 goto out;
1591 } 1655 }
1592out: 1656out:
1593 return rc; 1657 return rc;
@@ -1809,6 +1873,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
1809find_next_matching_auth_tok: 1873find_next_matching_auth_tok:
1810 found_auth_tok = 0; 1874 found_auth_tok = 0;
1811 if (auth_tok_key) { 1875 if (auth_tok_key) {
1876 up_write(&(auth_tok_key->sem));
1812 key_put(auth_tok_key); 1877 key_put(auth_tok_key);
1813 auth_tok_key = NULL; 1878 auth_tok_key = NULL;
1814 } 1879 }
@@ -1895,8 +1960,10 @@ found_matching_auth_tok:
1895out_wipe_list: 1960out_wipe_list:
1896 wipe_auth_tok_list(&auth_tok_list); 1961 wipe_auth_tok_list(&auth_tok_list);
1897out: 1962out:
1898 if (auth_tok_key) 1963 if (auth_tok_key) {
1964 up_write(&(auth_tok_key->sem));
1899 key_put(auth_tok_key); 1965 key_put(auth_tok_key);
1966 }
1900 return rc; 1967 return rc;
1901} 1968}
1902 1969
@@ -2324,7 +2391,7 @@ ecryptfs_generate_key_packet_set(char *dest_base,
2324 size_t max) 2391 size_t max)
2325{ 2392{
2326 struct ecryptfs_auth_tok *auth_tok; 2393 struct ecryptfs_auth_tok *auth_tok;
2327 struct ecryptfs_global_auth_tok *global_auth_tok; 2394 struct key *auth_tok_key = NULL;
2328 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = 2395 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
2329 &ecryptfs_superblock_to_private( 2396 &ecryptfs_superblock_to_private(
2330 ecryptfs_dentry->d_sb)->mount_crypt_stat; 2397 ecryptfs_dentry->d_sb)->mount_crypt_stat;
@@ -2343,21 +2410,16 @@ ecryptfs_generate_key_packet_set(char *dest_base,
2343 list_for_each_entry(key_sig, &crypt_stat->keysig_list, 2410 list_for_each_entry(key_sig, &crypt_stat->keysig_list,
2344 crypt_stat_list) { 2411 crypt_stat_list) {
2345 memset(key_rec, 0, sizeof(*key_rec)); 2412 memset(key_rec, 0, sizeof(*key_rec));
2346 rc = ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok, 2413 rc = ecryptfs_find_global_auth_tok_for_sig(&auth_tok_key,
2414 &auth_tok,
2347 mount_crypt_stat, 2415 mount_crypt_stat,
2348 key_sig->keysig); 2416 key_sig->keysig);
2349 if (rc) { 2417 if (rc) {
2350 printk(KERN_ERR "Error attempting to get the global " 2418 printk(KERN_WARNING "Unable to retrieve auth tok with "
2351 "auth_tok; rc = [%d]\n", rc); 2419 "sig = [%s]\n", key_sig->keysig);
2420 rc = process_find_global_auth_tok_for_sig_err(rc);
2352 goto out_free; 2421 goto out_free;
2353 } 2422 }
2354 if (global_auth_tok->flags & ECRYPTFS_AUTH_TOK_INVALID) {
2355 printk(KERN_WARNING
2356 "Skipping invalid auth tok with sig = [%s]\n",
2357 global_auth_tok->sig);
2358 continue;
2359 }
2360 auth_tok = global_auth_tok->global_auth_tok;
2361 if (auth_tok->token_type == ECRYPTFS_PASSWORD) { 2423 if (auth_tok->token_type == ECRYPTFS_PASSWORD) {
2362 rc = write_tag_3_packet((dest_base + (*len)), 2424 rc = write_tag_3_packet((dest_base + (*len)),
2363 &max, auth_tok, 2425 &max, auth_tok,
@@ -2395,6 +2457,9 @@ ecryptfs_generate_key_packet_set(char *dest_base,
2395 rc = -EINVAL; 2457 rc = -EINVAL;
2396 goto out_free; 2458 goto out_free;
2397 } 2459 }
2460 up_write(&(auth_tok_key->sem));
2461 key_put(auth_tok_key);
2462 auth_tok_key = NULL;
2398 } 2463 }
2399 if (likely(max > 0)) { 2464 if (likely(max > 0)) {
2400 dest_base[(*len)] = 0x00; 2465 dest_base[(*len)] = 0x00;
@@ -2407,6 +2472,11 @@ out_free:
2407out: 2472out:
2408 if (rc) 2473 if (rc)
2409 (*len) = 0; 2474 (*len) = 0;
2475 if (auth_tok_key) {
2476 up_write(&(auth_tok_key->sem));
2477 key_put(auth_tok_key);
2478 }
2479
2410 mutex_unlock(&crypt_stat->keysig_list_mutex); 2480 mutex_unlock(&crypt_stat->keysig_list_mutex);
2411 return rc; 2481 return rc;
2412} 2482}
@@ -2424,6 +2494,7 @@ int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig)
2424 return -ENOMEM; 2494 return -ENOMEM;
2425 } 2495 }
2426 memcpy(new_key_sig->keysig, sig, ECRYPTFS_SIG_SIZE_HEX); 2496 memcpy(new_key_sig->keysig, sig, ECRYPTFS_SIG_SIZE_HEX);
2497 new_key_sig->keysig[ECRYPTFS_SIG_SIZE_HEX] = '\0';
2427 /* Caller must hold keysig_list_mutex */ 2498 /* Caller must hold keysig_list_mutex */
2428 list_add(&new_key_sig->crypt_stat_list, &crypt_stat->keysig_list); 2499 list_add(&new_key_sig->crypt_stat_list, &crypt_stat->keysig_list);
2429 2500
@@ -2453,7 +2524,6 @@ ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
2453 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); 2524 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
2454 list_add(&new_auth_tok->mount_crypt_stat_list, 2525 list_add(&new_auth_tok->mount_crypt_stat_list,
2455 &mount_crypt_stat->global_auth_tok_list); 2526 &mount_crypt_stat->global_auth_tok_list);
2456 mount_crypt_stat->num_global_auth_toks++;
2457 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); 2527 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
2458out: 2528out:
2459 return rc; 2529 return rc;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 758323a0f09a..c27c0ecf90bc 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -122,7 +122,6 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
122 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); 122 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
123 int rc = 0; 123 int rc = 0;
124 124
125 mutex_lock(&inode_info->lower_file_mutex);
126 if (!inode_info->lower_file) { 125 if (!inode_info->lower_file) {
127 struct dentry *lower_dentry; 126 struct dentry *lower_dentry;
128 struct vfsmount *lower_mnt = 127 struct vfsmount *lower_mnt =
@@ -138,7 +137,6 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
138 inode_info->lower_file = NULL; 137 inode_info->lower_file = NULL;
139 } 138 }
140 } 139 }
141 mutex_unlock(&inode_info->lower_file_mutex);
142 return rc; 140 return rc;
143} 141}
144 142
@@ -241,14 +239,14 @@ static int ecryptfs_init_global_auth_toks(
241 struct ecryptfs_mount_crypt_stat *mount_crypt_stat) 239 struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
242{ 240{
243 struct ecryptfs_global_auth_tok *global_auth_tok; 241 struct ecryptfs_global_auth_tok *global_auth_tok;
242 struct ecryptfs_auth_tok *auth_tok;
244 int rc = 0; 243 int rc = 0;
245 244
246 list_for_each_entry(global_auth_tok, 245 list_for_each_entry(global_auth_tok,
247 &mount_crypt_stat->global_auth_tok_list, 246 &mount_crypt_stat->global_auth_tok_list,
248 mount_crypt_stat_list) { 247 mount_crypt_stat_list) {
249 rc = ecryptfs_keyring_auth_tok_for_sig( 248 rc = ecryptfs_keyring_auth_tok_for_sig(
250 &global_auth_tok->global_auth_tok_key, 249 &global_auth_tok->global_auth_tok_key, &auth_tok,
251 &global_auth_tok->global_auth_tok,
252 global_auth_tok->sig); 250 global_auth_tok->sig);
253 if (rc) { 251 if (rc) {
254 printk(KERN_ERR "Could not find valid key in user " 252 printk(KERN_ERR "Could not find valid key in user "
@@ -256,8 +254,10 @@ static int ecryptfs_init_global_auth_toks(
256 "option: [%s]\n", global_auth_tok->sig); 254 "option: [%s]\n", global_auth_tok->sig);
257 global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID; 255 global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID;
258 goto out; 256 goto out;
259 } else 257 } else {
260 global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID; 258 global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID;
259 up_write(&(global_auth_tok->global_auth_tok_key)->sem);
260 }
261 } 261 }
262out: 262out:
263 return rc; 263 return rc;
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index cc64fca89f8d..6a44148c5fb9 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -62,6 +62,18 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
62{ 62{
63 int rc; 63 int rc;
64 64
65 /*
66 * Refuse to write the page out if we are called from reclaim context
67 * since our writepage() path may potentially allocate memory when
68 * calling into the lower fs vfs_write() which may in turn invoke
69 * us again.
70 */
71 if (current->flags & PF_MEMALLOC) {
72 redirty_page_for_writepage(wbc, page);
73 rc = 0;
74 goto out;
75 }
76
65 rc = ecryptfs_encrypt_page(page); 77 rc = ecryptfs_encrypt_page(page);
66 if (rc) { 78 if (rc) {
67 ecryptfs_printk(KERN_WARNING, "Error encrypting " 79 ecryptfs_printk(KERN_WARNING, "Error encrypting "
@@ -70,8 +82,8 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
70 goto out; 82 goto out;
71 } 83 }
72 SetPageUptodate(page); 84 SetPageUptodate(page);
73 unlock_page(page);
74out: 85out:
86 unlock_page(page);
75 return rc; 87 return rc;
76} 88}
77 89
@@ -193,11 +205,7 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
193 &ecryptfs_inode_to_private(page->mapping->host)->crypt_stat; 205 &ecryptfs_inode_to_private(page->mapping->host)->crypt_stat;
194 int rc = 0; 206 int rc = 0;
195 207
196 if (!crypt_stat 208 if (!crypt_stat || !(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
197 || !(crypt_stat->flags & ECRYPTFS_ENCRYPTED)
198 || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) {
199 ecryptfs_printk(KERN_DEBUG,
200 "Passing through unencrypted page\n");
201 rc = ecryptfs_read_lower_page_segment(page, page->index, 0, 209 rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
202 PAGE_CACHE_SIZE, 210 PAGE_CACHE_SIZE,
203 page->mapping->host); 211 page->mapping->host);
@@ -295,8 +303,7 @@ static int ecryptfs_write_begin(struct file *file,
295 struct ecryptfs_crypt_stat *crypt_stat = 303 struct ecryptfs_crypt_stat *crypt_stat =
296 &ecryptfs_inode_to_private(mapping->host)->crypt_stat; 304 &ecryptfs_inode_to_private(mapping->host)->crypt_stat;
297 305
298 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED) 306 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
299 || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) {
300 rc = ecryptfs_read_lower_page_segment( 307 rc = ecryptfs_read_lower_page_segment(
301 page, index, 0, PAGE_CACHE_SIZE, mapping->host); 308 page, index, 0, PAGE_CACHE_SIZE, mapping->host);
302 if (rc) { 309 if (rc) {
@@ -374,6 +381,11 @@ static int ecryptfs_write_begin(struct file *file,
374 && (pos != 0)) 381 && (pos != 0))
375 zero_user(page, 0, PAGE_CACHE_SIZE); 382 zero_user(page, 0, PAGE_CACHE_SIZE);
376out: 383out:
384 if (unlikely(rc)) {
385 unlock_page(page);
386 page_cache_release(page);
387 *pagep = NULL;
388 }
377 return rc; 389 return rc;
378} 390}
379 391
@@ -486,13 +498,8 @@ static int ecryptfs_write_end(struct file *file,
486 struct ecryptfs_crypt_stat *crypt_stat = 498 struct ecryptfs_crypt_stat *crypt_stat =
487 &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; 499 &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
488 int rc; 500 int rc;
501 int need_unlock_page = 1;
489 502
490 if (crypt_stat->flags & ECRYPTFS_NEW_FILE) {
491 ecryptfs_printk(KERN_DEBUG, "ECRYPTFS_NEW_FILE flag set in "
492 "crypt_stat at memory location [%p]\n", crypt_stat);
493 crypt_stat->flags &= ~(ECRYPTFS_NEW_FILE);
494 } else
495 ecryptfs_printk(KERN_DEBUG, "Not a new file\n");
496 ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" 503 ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
497 "(page w/ index = [0x%.16lx], to = [%d])\n", index, to); 504 "(page w/ index = [0x%.16lx], to = [%d])\n", index, to);
498 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 505 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
@@ -512,26 +519,26 @@ static int ecryptfs_write_end(struct file *file,
512 "zeros in page with index = [0x%.16lx]\n", index); 519 "zeros in page with index = [0x%.16lx]\n", index);
513 goto out; 520 goto out;
514 } 521 }
515 rc = ecryptfs_encrypt_page(page); 522 set_page_dirty(page);
516 if (rc) { 523 unlock_page(page);
517 ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " 524 need_unlock_page = 0;
518 "index [0x%.16lx])\n", index);
519 goto out;
520 }
521 if (pos + copied > i_size_read(ecryptfs_inode)) { 525 if (pos + copied > i_size_read(ecryptfs_inode)) {
522 i_size_write(ecryptfs_inode, pos + copied); 526 i_size_write(ecryptfs_inode, pos + copied);
523 ecryptfs_printk(KERN_DEBUG, "Expanded file size to " 527 ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
524 "[0x%.16llx]\n", 528 "[0x%.16llx]\n",
525 (unsigned long long)i_size_read(ecryptfs_inode)); 529 (unsigned long long)i_size_read(ecryptfs_inode));
530 balance_dirty_pages_ratelimited(mapping);
531 rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
532 if (rc) {
533 printk(KERN_ERR "Error writing inode size to metadata; "
534 "rc = [%d]\n", rc);
535 goto out;
536 }
526 } 537 }
527 rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); 538 rc = copied;
528 if (rc)
529 printk(KERN_ERR "Error writing inode size to metadata; "
530 "rc = [%d]\n", rc);
531 else
532 rc = copied;
533out: 539out:
534 unlock_page(page); 540 if (need_unlock_page)
541 unlock_page(page);
535 page_cache_release(page); 542 page_cache_release(page);
536 return rc; 543 return rc;
537} 544}
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index db184ef15d3d..85d430963116 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -44,15 +44,11 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
44 ssize_t rc; 44 ssize_t rc;
45 45
46 inode_info = ecryptfs_inode_to_private(ecryptfs_inode); 46 inode_info = ecryptfs_inode_to_private(ecryptfs_inode);
47 mutex_lock(&inode_info->lower_file_mutex);
48 BUG_ON(!inode_info->lower_file); 47 BUG_ON(!inode_info->lower_file);
49 inode_info->lower_file->f_pos = offset;
50 fs_save = get_fs(); 48 fs_save = get_fs();
51 set_fs(get_ds()); 49 set_fs(get_ds());
52 rc = vfs_write(inode_info->lower_file, data, size, 50 rc = vfs_write(inode_info->lower_file, data, size, &offset);
53 &inode_info->lower_file->f_pos);
54 set_fs(fs_save); 51 set_fs(fs_save);
55 mutex_unlock(&inode_info->lower_file_mutex);
56 mark_inode_dirty_sync(ecryptfs_inode); 52 mark_inode_dirty_sync(ecryptfs_inode);
57 return rc; 53 return rc;
58} 54}
@@ -234,15 +230,11 @@ int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
234 mm_segment_t fs_save; 230 mm_segment_t fs_save;
235 ssize_t rc; 231 ssize_t rc;
236 232
237 mutex_lock(&inode_info->lower_file_mutex);
238 BUG_ON(!inode_info->lower_file); 233 BUG_ON(!inode_info->lower_file);
239 inode_info->lower_file->f_pos = offset;
240 fs_save = get_fs(); 234 fs_save = get_fs();
241 set_fs(get_ds()); 235 set_fs(get_ds());
242 rc = vfs_read(inode_info->lower_file, data, size, 236 rc = vfs_read(inode_info->lower_file, data, size, &offset);
243 &inode_info->lower_file->f_pos);
244 set_fs(fs_save); 237 set_fs(fs_save);
245 mutex_unlock(&inode_info->lower_file_mutex);
246 return rc; 238 return rc;
247} 239}
248 240
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 3042fe123a34..bacc882e1ae4 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -55,7 +55,6 @@ static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
55 if (unlikely(!inode_info)) 55 if (unlikely(!inode_info))
56 goto out; 56 goto out;
57 ecryptfs_init_crypt_stat(&inode_info->crypt_stat); 57 ecryptfs_init_crypt_stat(&inode_info->crypt_stat);
58 mutex_init(&inode_info->lower_file_mutex);
59 inode_info->lower_file = NULL; 58 inode_info->lower_file = NULL;
60 inode = &inode_info->vfs_inode; 59 inode = &inode_info->vfs_inode;
61out: 60out:
@@ -198,7 +197,7 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
198const struct super_operations ecryptfs_sops = { 197const struct super_operations ecryptfs_sops = {
199 .alloc_inode = ecryptfs_alloc_inode, 198 .alloc_inode = ecryptfs_alloc_inode,
200 .destroy_inode = ecryptfs_destroy_inode, 199 .destroy_inode = ecryptfs_destroy_inode,
201 .drop_inode = generic_delete_inode, 200 .drop_inode = generic_drop_inode,
202 .statfs = ecryptfs_statfs, 201 .statfs = ecryptfs_statfs,
203 .remount_fs = NULL, 202 .remount_fs = NULL,
204 .evict_inode = ecryptfs_evict_inode, 203 .evict_inode = ecryptfs_evict_inode,
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index a8e7797b9477..9c13412e6c99 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -23,7 +23,6 @@ static sector_t _efs_bmap(struct address_space *mapping, sector_t block)
23} 23}
24static const struct address_space_operations efs_aops = { 24static const struct address_space_operations efs_aops = {
25 .readpage = efs_readpage, 25 .readpage = efs_readpage,
26 .sync_page = block_sync_page,
27 .bmap = _efs_bmap 26 .bmap = _efs_bmap
28}; 27};
29 28
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index ff12f7ac73ef..ed38801b57a7 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -316,6 +316,19 @@ static void ep_nested_calls_init(struct nested_calls *ncalls)
316} 316}
317 317
318/** 318/**
319 * ep_events_available - Checks if ready events might be available.
320 *
321 * @ep: Pointer to the eventpoll context.
322 *
323 * Returns: Returns a value different than zero if ready events are available,
324 * or zero otherwise.
325 */
326static inline int ep_events_available(struct eventpoll *ep)
327{
328 return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
329}
330
331/**
319 * ep_call_nested - Perform a bound (possibly) nested call, by checking 332 * ep_call_nested - Perform a bound (possibly) nested call, by checking
320 * that the recursion limit is not exceeded, and that 333 * that the recursion limit is not exceeded, and that
321 * the same nested call (by the meaning of same cookie) is 334 * the same nested call (by the meaning of same cookie) is
@@ -1135,12 +1148,29 @@ static inline struct timespec ep_set_mstimeout(long ms)
1135 return timespec_add_safe(now, ts); 1148 return timespec_add_safe(now, ts);
1136} 1149}
1137 1150
1151/**
1152 * ep_poll - Retrieves ready events, and delivers them to the caller supplied
1153 * event buffer.
1154 *
1155 * @ep: Pointer to the eventpoll context.
1156 * @events: Pointer to the userspace buffer where the ready events should be
1157 * stored.
1158 * @maxevents: Size (in terms of number of events) of the caller event buffer.
1159 * @timeout: Maximum timeout for the ready events fetch operation, in
1160 * milliseconds. If the @timeout is zero, the function will not block,
1161 * while if the @timeout is less than zero, the function will block
1162 * until at least one event has been retrieved (or an error
1163 * occurred).
1164 *
1165 * Returns: Returns the number of ready events which have been fetched, or an
1166 * error code, in case of error.
1167 */
1138static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, 1168static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1139 int maxevents, long timeout) 1169 int maxevents, long timeout)
1140{ 1170{
1141 int res, eavail, timed_out = 0; 1171 int res = 0, eavail, timed_out = 0;
1142 unsigned long flags; 1172 unsigned long flags;
1143 long slack; 1173 long slack = 0;
1144 wait_queue_t wait; 1174 wait_queue_t wait;
1145 ktime_t expires, *to = NULL; 1175 ktime_t expires, *to = NULL;
1146 1176
@@ -1151,14 +1181,19 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1151 to = &expires; 1181 to = &expires;
1152 *to = timespec_to_ktime(end_time); 1182 *to = timespec_to_ktime(end_time);
1153 } else if (timeout == 0) { 1183 } else if (timeout == 0) {
1184 /*
1185 * Avoid the unnecessary trip to the wait queue loop, if the
1186 * caller specified a non blocking operation.
1187 */
1154 timed_out = 1; 1188 timed_out = 1;
1189 spin_lock_irqsave(&ep->lock, flags);
1190 goto check_events;
1155 } 1191 }
1156 1192
1157retry: 1193fetch_events:
1158 spin_lock_irqsave(&ep->lock, flags); 1194 spin_lock_irqsave(&ep->lock, flags);
1159 1195
1160 res = 0; 1196 if (!ep_events_available(ep)) {
1161 if (list_empty(&ep->rdllist)) {
1162 /* 1197 /*
1163 * We don't have any available event to return to the caller. 1198 * We don't have any available event to return to the caller.
1164 * We need to sleep here, and we will be wake up by 1199 * We need to sleep here, and we will be wake up by
@@ -1174,7 +1209,7 @@ retry:
1174 * to TASK_INTERRUPTIBLE before doing the checks. 1209 * to TASK_INTERRUPTIBLE before doing the checks.
1175 */ 1210 */
1176 set_current_state(TASK_INTERRUPTIBLE); 1211 set_current_state(TASK_INTERRUPTIBLE);
1177 if (!list_empty(&ep->rdllist) || timed_out) 1212 if (ep_events_available(ep) || timed_out)
1178 break; 1213 break;
1179 if (signal_pending(current)) { 1214 if (signal_pending(current)) {
1180 res = -EINTR; 1215 res = -EINTR;
@@ -1191,8 +1226,9 @@ retry:
1191 1226
1192 set_current_state(TASK_RUNNING); 1227 set_current_state(TASK_RUNNING);
1193 } 1228 }
1229check_events:
1194 /* Is it worth to try to dig for events ? */ 1230 /* Is it worth to try to dig for events ? */
1195 eavail = !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR; 1231 eavail = ep_events_available(ep);
1196 1232
1197 spin_unlock_irqrestore(&ep->lock, flags); 1233 spin_unlock_irqrestore(&ep->lock, flags);
1198 1234
@@ -1203,7 +1239,7 @@ retry:
1203 */ 1239 */
1204 if (!res && eavail && 1240 if (!res && eavail &&
1205 !(res = ep_send_events(ep, events, maxevents)) && !timed_out) 1241 !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
1206 goto retry; 1242 goto fetch_events;
1207 1243
1208 return res; 1244 return res;
1209} 1245}
diff --git a/fs/exec.c b/fs/exec.c
index ba99e1abb1aa..5e62d26a4fec 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1875,7 +1875,7 @@ static void wait_for_dump_helpers(struct file *file)
1875 1875
1876 1876
1877/* 1877/*
1878 * uhm_pipe_setup 1878 * umh_pipe_setup
1879 * helper function to customize the process used 1879 * helper function to customize the process used
1880 * to collect the core in userspace. Specifically 1880 * to collect the core in userspace. Specifically
1881 * it sets up a pipe and installs it as fd 0 (stdin) 1881 * it sets up a pipe and installs it as fd 0 (stdin)
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index f0d520312d8b..5e74ad3d4009 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -53,10 +53,14 @@
53#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ 53#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */
54 54
55/* exofs Application specific page/attribute */ 55/* exofs Application specific page/attribute */
56/* Inode attrs */
56# define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3) 57# define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3)
57# define EXOFS_ATTR_INODE_DATA 1 58# define EXOFS_ATTR_INODE_DATA 1
58# define EXOFS_ATTR_INODE_FILE_LAYOUT 2 59# define EXOFS_ATTR_INODE_FILE_LAYOUT 2
59# define EXOFS_ATTR_INODE_DIR_LAYOUT 3 60# define EXOFS_ATTR_INODE_DIR_LAYOUT 3
61/* Partition attrs */
62# define EXOFS_APAGE_SB_DATA (0xF0000000U + 3)
63# define EXOFS_ATTR_SB_STATS 1
60 64
61/* 65/*
62 * The maximum number of files we can have is limited by the size of the 66 * The maximum number of files we can have is limited by the size of the
@@ -86,8 +90,8 @@ enum {
86 */ 90 */
87enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1}; 91enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1};
88struct exofs_fscb { 92struct exofs_fscb {
89 __le64 s_nextid; /* Highest object ID used */ 93 __le64 s_nextid; /* Only used after mkfs */
90 __le64 s_numfiles; /* Number of files on fs */ 94 __le64 s_numfiles; /* Only used after mkfs */
91 __le32 s_version; /* == EXOFS_FSCB_VER */ 95 __le32 s_version; /* == EXOFS_FSCB_VER */
92 __le16 s_magic; /* Magic signature */ 96 __le16 s_magic; /* Magic signature */
93 __le16 s_newfs; /* Non-zero if this is a new fs */ 97 __le16 s_newfs; /* Non-zero if this is a new fs */
@@ -98,6 +102,16 @@ struct exofs_fscb {
98} __packed; 102} __packed;
99 103
100/* 104/*
105 * This struct is set on the FS partition's attributes.
106 * [EXOFS_APAGE_SB_DATA, EXOFS_ATTR_SB_STATS] and is written together
107 * with the create command, to atomically persist the sb writeable information.
108 */
109struct exofs_sb_stats {
110 __le64 s_nextid; /* Highest object ID used */
111 __le64 s_numfiles; /* Number of files on fs */
112} __packed;
113
114/*
101 * Describes the raid used in the FS. It is part of the device table. 115 * Describes the raid used in the FS. It is part of the device table.
102 * This here is taken from the pNFS-objects definition. In exofs we 116 * This here is taken from the pNFS-objects definition. In exofs we
103 * use one raid policy through-out the filesystem. (NOTE: the funny 117 * use one raid policy through-out the filesystem. (NOTE: the funny
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index dcc941d82d67..d0941c6a1f72 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -124,7 +124,7 @@ out:
124 124
125Ebadsize: 125Ebadsize:
126 EXOFS_ERR("ERROR [exofs_check_page]: " 126 EXOFS_ERR("ERROR [exofs_check_page]: "
127 "size of directory #%lu is not a multiple of chunk size", 127 "size of directory(0x%lx) is not a multiple of chunk size\n",
128 dir->i_ino 128 dir->i_ino
129 ); 129 );
130 goto fail; 130 goto fail;
@@ -142,8 +142,8 @@ Espan:
142 goto bad_entry; 142 goto bad_entry;
143bad_entry: 143bad_entry:
144 EXOFS_ERR( 144 EXOFS_ERR(
145 "ERROR [exofs_check_page]: bad entry in directory #%lu: %s - " 145 "ERROR [exofs_check_page]: bad entry in directory(0x%lx): %s - "
146 "offset=%lu, inode=%llu, rec_len=%d, name_len=%d", 146 "offset=%lu, inode=0x%llu, rec_len=%d, name_len=%d\n",
147 dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, 147 dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
148 _LLU(le64_to_cpu(p->inode_no)), 148 _LLU(le64_to_cpu(p->inode_no)),
149 rec_len, p->name_len); 149 rec_len, p->name_len);
@@ -151,8 +151,8 @@ bad_entry:
151Eend: 151Eend:
152 p = (struct exofs_dir_entry *)(kaddr + offs); 152 p = (struct exofs_dir_entry *)(kaddr + offs);
153 EXOFS_ERR("ERROR [exofs_check_page]: " 153 EXOFS_ERR("ERROR [exofs_check_page]: "
154 "entry in directory #%lu spans the page boundary" 154 "entry in directory(0x%lx) spans the page boundary"
155 "offset=%lu, inode=%llu", 155 "offset=%lu, inode=0x%llx\n",
156 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, 156 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
157 _LLU(le64_to_cpu(p->inode_no))); 157 _LLU(le64_to_cpu(p->inode_no)));
158fail: 158fail:
@@ -261,9 +261,8 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir)
261 struct page *page = exofs_get_page(inode, n); 261 struct page *page = exofs_get_page(inode, n);
262 262
263 if (IS_ERR(page)) { 263 if (IS_ERR(page)) {
264 EXOFS_ERR("ERROR: " 264 EXOFS_ERR("ERROR: bad page in directory(0x%lx)\n",
265 "bad page in #%lu", 265 inode->i_ino);
266 inode->i_ino);
267 filp->f_pos += PAGE_CACHE_SIZE - offset; 266 filp->f_pos += PAGE_CACHE_SIZE - offset;
268 return PTR_ERR(page); 267 return PTR_ERR(page);
269 } 268 }
@@ -283,7 +282,8 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir)
283 for (; (char *)de <= limit; de = exofs_next_entry(de)) { 282 for (; (char *)de <= limit; de = exofs_next_entry(de)) {
284 if (de->rec_len == 0) { 283 if (de->rec_len == 0) {
285 EXOFS_ERR("ERROR: " 284 EXOFS_ERR("ERROR: "
286 "zero-length directory entry"); 285 "zero-length entry in directory(0x%lx)\n",
286 inode->i_ino);
287 exofs_put_page(page); 287 exofs_put_page(page);
288 return -EIO; 288 return -EIO;
289 } 289 }
@@ -342,9 +342,9 @@ struct exofs_dir_entry *exofs_find_entry(struct inode *dir,
342 kaddr += exofs_last_byte(dir, n) - reclen; 342 kaddr += exofs_last_byte(dir, n) - reclen;
343 while ((char *) de <= kaddr) { 343 while ((char *) de <= kaddr) {
344 if (de->rec_len == 0) { 344 if (de->rec_len == 0) {
345 EXOFS_ERR( 345 EXOFS_ERR("ERROR: zero-length entry in "
346 "ERROR: exofs_find_entry: " 346 "directory(0x%lx)\n",
347 "zero-length directory entry"); 347 dir->i_ino);
348 exofs_put_page(page); 348 exofs_put_page(page);
349 goto out; 349 goto out;
350 } 350 }
@@ -472,7 +472,8 @@ int exofs_add_link(struct dentry *dentry, struct inode *inode)
472 } 472 }
473 if (de->rec_len == 0) { 473 if (de->rec_len == 0) {
474 EXOFS_ERR("ERROR: exofs_add_link: " 474 EXOFS_ERR("ERROR: exofs_add_link: "
475 "zero-length directory entry"); 475 "zero-length entry in directory(0x%lx)\n",
476 inode->i_ino);
476 err = -EIO; 477 err = -EIO;
477 goto out_unlock; 478 goto out_unlock;
478 } 479 }
@@ -491,7 +492,8 @@ int exofs_add_link(struct dentry *dentry, struct inode *inode)
491 exofs_put_page(page); 492 exofs_put_page(page);
492 } 493 }
493 494
494 EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=%p", dentry, inode); 495 EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=0x%lx\n",
496 dentry, inode->i_ino);
495 return -EINVAL; 497 return -EINVAL;
496 498
497got_it: 499got_it:
@@ -542,7 +544,8 @@ int exofs_delete_entry(struct exofs_dir_entry *dir, struct page *page)
542 while (de < dir) { 544 while (de < dir) {
543 if (de->rec_len == 0) { 545 if (de->rec_len == 0) {
544 EXOFS_ERR("ERROR: exofs_delete_entry:" 546 EXOFS_ERR("ERROR: exofs_delete_entry:"
545 "zero-length directory entry"); 547 "zero-length entry in directory(0x%lx)\n",
548 inode->i_ino);
546 err = -EIO; 549 err = -EIO;
547 goto out; 550 goto out;
548 } 551 }
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 2dc925fa1010..c965806c2821 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -77,7 +77,7 @@ struct exofs_layout {
77 * our extension to the in-memory superblock 77 * our extension to the in-memory superblock
78 */ 78 */
79struct exofs_sb_info { 79struct exofs_sb_info {
80 struct exofs_fscb s_fscb; /* Written often, pre-allocate*/ 80 struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/
81 int s_timeout; /* timeout for OSD operations */ 81 int s_timeout; /* timeout for OSD operations */
82 uint64_t s_nextid; /* highest object ID used */ 82 uint64_t s_nextid; /* highest object ID used */
83 uint32_t s_numfiles; /* number of files on fs */ 83 uint32_t s_numfiles; /* number of files on fs */
@@ -256,6 +256,8 @@ static inline int exofs_oi_read(struct exofs_i_info *oi,
256} 256}
257 257
258/* inode.c */ 258/* inode.c */
259unsigned exofs_max_io_pages(struct exofs_layout *layout,
260 unsigned expected_pages);
259int exofs_setattr(struct dentry *, struct iattr *); 261int exofs_setattr(struct dentry *, struct iattr *);
260int exofs_write_begin(struct file *file, struct address_space *mapping, 262int exofs_write_begin(struct file *file, struct address_space *mapping,
261 loff_t pos, unsigned len, unsigned flags, 263 loff_t pos, unsigned len, unsigned flags,
@@ -279,7 +281,7 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
279 struct inode *); 281 struct inode *);
280 282
281/* super.c */ 283/* super.c */
282int exofs_sync_fs(struct super_block *sb, int wait); 284int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
283 285
284/********************* 286/*********************
285 * operation vectors * 287 * operation vectors *
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index b905c79b4f0a..45ca323d8363 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -45,22 +45,8 @@ static int exofs_release_file(struct inode *inode, struct file *filp)
45static int exofs_file_fsync(struct file *filp, int datasync) 45static int exofs_file_fsync(struct file *filp, int datasync)
46{ 46{
47 int ret; 47 int ret;
48 struct inode *inode = filp->f_mapping->host;
49 struct super_block *sb;
50
51 if (!(inode->i_state & I_DIRTY))
52 return 0;
53 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
54 return 0;
55
56 ret = sync_inode_metadata(inode, 1);
57
58 /* This is a good place to write the sb */
59 /* TODO: Sechedule an sb-sync on create */
60 sb = inode->i_sb;
61 if (sb->s_dirt)
62 exofs_sync_fs(sb, 1);
63 48
49 ret = sync_inode_metadata(filp->f_mapping->host, 1);
64 return ret; 50 return ret;
65} 51}
66 52
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index a7555238c41a..8472c098445d 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -43,6 +43,17 @@ enum { BIO_MAX_PAGES_KMALLOC =
43 PAGE_SIZE / sizeof(struct page *), 43 PAGE_SIZE / sizeof(struct page *),
44}; 44};
45 45
46unsigned exofs_max_io_pages(struct exofs_layout *layout,
47 unsigned expected_pages)
48{
49 unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC);
50
51 /* TODO: easily support bio chaining */
52 pages = min_t(unsigned, pages,
53 layout->group_width * BIO_MAX_PAGES_KMALLOC);
54 return pages;
55}
56
46struct page_collect { 57struct page_collect {
47 struct exofs_sb_info *sbi; 58 struct exofs_sb_info *sbi;
48 struct inode *inode; 59 struct inode *inode;
@@ -97,8 +108,7 @@ static void _pcol_reset(struct page_collect *pcol)
97 108
98static int pcol_try_alloc(struct page_collect *pcol) 109static int pcol_try_alloc(struct page_collect *pcol)
99{ 110{
100 unsigned pages = min_t(unsigned, pcol->expected_pages, 111 unsigned pages;
101 MAX_PAGES_KMALLOC);
102 112
103 if (!pcol->ios) { /* First time allocate io_state */ 113 if (!pcol->ios) { /* First time allocate io_state */
104 int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); 114 int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
@@ -108,8 +118,7 @@ static int pcol_try_alloc(struct page_collect *pcol)
108 } 118 }
109 119
110 /* TODO: easily support bio chaining */ 120 /* TODO: easily support bio chaining */
111 pages = min_t(unsigned, pages, 121 pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages);
112 pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC);
113 122
114 for (; pages; pages >>= 1) { 123 for (; pages; pages >>= 1) {
115 pcol->pages = kmalloc(pages * sizeof(struct page *), 124 pcol->pages = kmalloc(pages * sizeof(struct page *),
@@ -350,8 +359,10 @@ static int readpage_strip(void *data, struct page *page)
350 359
351 if (!pcol->read_4_write) 360 if (!pcol->read_4_write)
352 unlock_page(page); 361 unlock_page(page);
353 EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," 362 EXOFS_DBGMSG("readpage_strip(0x%lx) empty page len=%zx "
354 " splitting\n", inode->i_ino, page->index); 363 "read_4_write=%d index=0x%lx end_index=0x%lx "
364 "splitting\n", inode->i_ino, len,
365 pcol->read_4_write, page->index, end_index);
355 366
356 return read_exec(pcol); 367 return read_exec(pcol);
357 } 368 }
@@ -722,11 +733,28 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
722 733
723 /* read modify write */ 734 /* read modify write */
724 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { 735 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
736 loff_t i_size = i_size_read(mapping->host);
737 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
738 size_t rlen;
739
740 if (page->index < end_index)
741 rlen = PAGE_CACHE_SIZE;
742 else if (page->index == end_index)
743 rlen = i_size & ~PAGE_CACHE_MASK;
744 else
745 rlen = 0;
746
747 if (!rlen) {
748 clear_highpage(page);
749 SetPageUptodate(page);
750 goto out;
751 }
752
725 ret = _readpage(page, true); 753 ret = _readpage(page, true);
726 if (ret) { 754 if (ret) {
727 /*SetPageError was done by _readpage. Is it ok?*/ 755 /*SetPageError was done by _readpage. Is it ok?*/
728 unlock_page(page); 756 unlock_page(page);
729 EXOFS_DBGMSG("__readpage_filler failed\n"); 757 EXOFS_DBGMSG("__readpage failed\n");
730 } 758 }
731 } 759 }
732out: 760out:
@@ -795,7 +823,6 @@ const struct address_space_operations exofs_aops = {
795 .direct_IO = NULL, /* TODO: Should be trivial to do */ 823 .direct_IO = NULL, /* TODO: Should be trivial to do */
796 824
797 /* With these NULL has special meaning or default is not exported */ 825 /* With these NULL has special meaning or default is not exported */
798 .sync_page = NULL,
799 .get_xip_mem = NULL, 826 .get_xip_mem = NULL,
800 .migratepage = NULL, 827 .migratepage = NULL,
801 .launder_page = NULL, 828 .launder_page = NULL,
@@ -1030,6 +1057,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1030 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); 1057 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
1031 } 1058 }
1032 1059
1060 inode->i_mapping->backing_dev_info = sb->s_bdi;
1033 if (S_ISREG(inode->i_mode)) { 1061 if (S_ISREG(inode->i_mode)) {
1034 inode->i_op = &exofs_file_inode_operations; 1062 inode->i_op = &exofs_file_inode_operations;
1035 inode->i_fop = &exofs_file_operations; 1063 inode->i_fop = &exofs_file_operations;
@@ -1073,6 +1101,7 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
1073 } 1101 }
1074 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; 1102 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0;
1075} 1103}
1104
1076/* 1105/*
1077 * Callback function from exofs_new_inode(). The important thing is that we 1106 * Callback function from exofs_new_inode(). The important thing is that we
1078 * set the obj_created flag so that other methods know that the object exists on 1107 * set the obj_created flag so that other methods know that the object exists on
@@ -1130,7 +1159,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1130 1159
1131 sbi = sb->s_fs_info; 1160 sbi = sb->s_fs_info;
1132 1161
1133 sb->s_dirt = 1; 1162 inode->i_mapping->backing_dev_info = sb->s_bdi;
1134 inode_init_owner(inode, dir, mode); 1163 inode_init_owner(inode, dir, mode);
1135 inode->i_ino = sbi->s_nextid++; 1164 inode->i_ino = sbi->s_nextid++;
1136 inode->i_blkbits = EXOFS_BLKSHIFT; 1165 inode->i_blkbits = EXOFS_BLKSHIFT;
@@ -1141,6 +1170,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1141 spin_unlock(&sbi->s_next_gen_lock); 1170 spin_unlock(&sbi->s_next_gen_lock);
1142 insert_inode_hash(inode); 1171 insert_inode_hash(inode);
1143 1172
1173 exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */
1174
1144 mark_inode_dirty(inode); 1175 mark_inode_dirty(inode);
1145 1176
1146 ret = exofs_get_io_state(&sbi->layout, &ios); 1177 ret = exofs_get_io_state(&sbi->layout, &ios);
@@ -1271,7 +1302,8 @@ out:
1271 1302
1272int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) 1303int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
1273{ 1304{
1274 return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); 1305 /* FIXME: fix fsync and use wbc->sync_mode == WB_SYNC_ALL */
1306 return exofs_update_inode(inode, 1);
1275} 1307}
1276 1308
1277/* 1309/*
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 8c6c4669b381..06065bd37fc3 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -48,6 +48,7 @@
48 * struct to hold what we get from mount options 48 * struct to hold what we get from mount options
49 */ 49 */
50struct exofs_mountopt { 50struct exofs_mountopt {
51 bool is_osdname;
51 const char *dev_name; 52 const char *dev_name;
52 uint64_t pid; 53 uint64_t pid;
53 int timeout; 54 int timeout;
@@ -56,7 +57,7 @@ struct exofs_mountopt {
56/* 57/*
57 * exofs-specific mount-time options. 58 * exofs-specific mount-time options.
58 */ 59 */
59enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err }; 60enum { Opt_name, Opt_pid, Opt_to, Opt_err };
60 61
61/* 62/*
62 * Our mount-time options. These should ideally be 64-bit unsigned, but the 63 * Our mount-time options. These should ideally be 64-bit unsigned, but the
@@ -64,6 +65,7 @@ enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err };
64 * sufficient for most applications now. 65 * sufficient for most applications now.
65 */ 66 */
66static match_table_t tokens = { 67static match_table_t tokens = {
68 {Opt_name, "osdname=%s"},
67 {Opt_pid, "pid=%u"}, 69 {Opt_pid, "pid=%u"},
68 {Opt_to, "to=%u"}, 70 {Opt_to, "to=%u"},
69 {Opt_err, NULL} 71 {Opt_err, NULL}
@@ -94,6 +96,14 @@ static int parse_options(char *options, struct exofs_mountopt *opts)
94 96
95 token = match_token(p, tokens, args); 97 token = match_token(p, tokens, args);
96 switch (token) { 98 switch (token) {
99 case Opt_name:
100 opts->dev_name = match_strdup(&args[0]);
101 if (unlikely(!opts->dev_name)) {
102 EXOFS_ERR("Error allocating dev_name");
103 return -ENOMEM;
104 }
105 opts->is_osdname = true;
106 break;
97 case Opt_pid: 107 case Opt_pid:
98 if (0 == match_strlcpy(str, &args[0], sizeof(str))) 108 if (0 == match_strlcpy(str, &args[0], sizeof(str)))
99 return -EINVAL; 109 return -EINVAL;
@@ -203,6 +213,101 @@ static void destroy_inodecache(void)
203static const struct super_operations exofs_sops; 213static const struct super_operations exofs_sops;
204static const struct export_operations exofs_export_ops; 214static const struct export_operations exofs_export_ops;
205 215
216static const struct osd_attr g_attr_sb_stats = ATTR_DEF(
217 EXOFS_APAGE_SB_DATA,
218 EXOFS_ATTR_SB_STATS,
219 sizeof(struct exofs_sb_stats));
220
221static int __sbi_read_stats(struct exofs_sb_info *sbi)
222{
223 struct osd_attr attrs[] = {
224 [0] = g_attr_sb_stats,
225 };
226 struct exofs_io_state *ios;
227 int ret;
228
229 ret = exofs_get_io_state(&sbi->layout, &ios);
230 if (unlikely(ret)) {
231 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
232 return ret;
233 }
234
235 ios->cred = sbi->s_cred;
236
237 ios->in_attr = attrs;
238 ios->in_attr_len = ARRAY_SIZE(attrs);
239
240 ret = exofs_sbi_read(ios);
241 if (unlikely(ret)) {
242 EXOFS_ERR("Error reading super_block stats => %d\n", ret);
243 goto out;
244 }
245
246 ret = extract_attr_from_ios(ios, &attrs[0]);
247 if (ret) {
248 EXOFS_ERR("%s: extract_attr of sb_stats failed\n", __func__);
249 goto out;
250 }
251 if (attrs[0].len) {
252 struct exofs_sb_stats *ess;
253
254 if (unlikely(attrs[0].len != sizeof(*ess))) {
255 EXOFS_ERR("%s: Wrong version of exofs_sb_stats "
256 "size(%d) != expected(%zd)\n",
257 __func__, attrs[0].len, sizeof(*ess));
258 goto out;
259 }
260
261 ess = attrs[0].val_ptr;
262 sbi->s_nextid = le64_to_cpu(ess->s_nextid);
263 sbi->s_numfiles = le32_to_cpu(ess->s_numfiles);
264 }
265
266out:
267 exofs_put_io_state(ios);
268 return ret;
269}
270
271static void stats_done(struct exofs_io_state *ios, void *p)
272{
273 exofs_put_io_state(ios);
274 /* Good thanks nothing to do anymore */
275}
276
277/* Asynchronously write the stats attribute */
278int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
279{
280 struct osd_attr attrs[] = {
281 [0] = g_attr_sb_stats,
282 };
283 struct exofs_io_state *ios;
284 int ret;
285
286 ret = exofs_get_io_state(&sbi->layout, &ios);
287 if (unlikely(ret)) {
288 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
289 return ret;
290 }
291
292 sbi->s_ess.s_nextid = cpu_to_le64(sbi->s_nextid);
293 sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles);
294 attrs[0].val_ptr = &sbi->s_ess;
295
296 ios->cred = sbi->s_cred;
297 ios->done = stats_done;
298 ios->private = sbi;
299 ios->out_attr = attrs;
300 ios->out_attr_len = ARRAY_SIZE(attrs);
301
302 ret = exofs_sbi_write(ios);
303 if (unlikely(ret)) {
304 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
305 exofs_put_io_state(ios);
306 }
307
308 return ret;
309}
310
206/* 311/*
207 * Write the superblock to the OSD 312 * Write the superblock to the OSD
208 */ 313 */
@@ -213,18 +318,25 @@ int exofs_sync_fs(struct super_block *sb, int wait)
213 struct exofs_io_state *ios; 318 struct exofs_io_state *ios;
214 int ret = -ENOMEM; 319 int ret = -ENOMEM;
215 320
216 lock_super(sb); 321 fscb = kmalloc(sizeof(*fscb), GFP_KERNEL);
322 if (unlikely(!fscb))
323 return -ENOMEM;
324
217 sbi = sb->s_fs_info; 325 sbi = sb->s_fs_info;
218 fscb = &sbi->s_fscb;
219 326
327 /* NOTE: We no longer dirty the super_block anywhere in exofs. The
328 * reason we write the fscb here on unmount is so we can stay backwards
329 * compatible with fscb->s_version == 1. (What we are not compatible
330 * with is if a new version FS crashed and then we try to mount an old
331 * version). Otherwise the exofs_fscb is read-only from mkfs time. All
332 * the writeable info is set in exofs_sbi_write_stats() above.
333 */
220 ret = exofs_get_io_state(&sbi->layout, &ios); 334 ret = exofs_get_io_state(&sbi->layout, &ios);
221 if (ret) 335 if (unlikely(ret))
222 goto out; 336 goto out;
223 337
224 /* Note: We only write the changing part of the fscb. .i.e upto the 338 lock_super(sb);
225 * the fscb->s_dev_table_oid member. There is no read-modify-write 339
226 * here.
227 */
228 ios->length = offsetof(struct exofs_fscb, s_dev_table_oid); 340 ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);
229 memset(fscb, 0, ios->length); 341 memset(fscb, 0, ios->length);
230 fscb->s_nextid = cpu_to_le64(sbi->s_nextid); 342 fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
@@ -239,16 +351,17 @@ int exofs_sync_fs(struct super_block *sb, int wait)
239 ios->cred = sbi->s_cred; 351 ios->cred = sbi->s_cred;
240 352
241 ret = exofs_sbi_write(ios); 353 ret = exofs_sbi_write(ios);
242 if (unlikely(ret)) { 354 if (unlikely(ret))
243 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); 355 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
244 goto out; 356 else
245 } 357 sb->s_dirt = 0;
246 sb->s_dirt = 0; 358
247 359
360 unlock_super(sb);
248out: 361out:
249 EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); 362 EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
250 exofs_put_io_state(ios); 363 exofs_put_io_state(ios);
251 unlock_super(sb); 364 kfree(fscb);
252 return ret; 365 return ret;
253} 366}
254 367
@@ -292,13 +405,14 @@ static void exofs_put_super(struct super_block *sb)
292 int num_pend; 405 int num_pend;
293 struct exofs_sb_info *sbi = sb->s_fs_info; 406 struct exofs_sb_info *sbi = sb->s_fs_info;
294 407
295 if (sb->s_dirt)
296 exofs_write_super(sb);
297
298 /* make sure there are no pending commands */ 408 /* make sure there are no pending commands */
299 for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0; 409 for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;
300 num_pend = atomic_read(&sbi->s_curr_pending)) { 410 num_pend = atomic_read(&sbi->s_curr_pending)) {
301 wait_queue_head_t wq; 411 wait_queue_head_t wq;
412
413 printk(KERN_NOTICE "%s: !!Pending operations in flight. "
414 "This is a BUG. please report to osd-dev@open-osd.org\n",
415 __func__);
302 init_waitqueue_head(&wq); 416 init_waitqueue_head(&wq);
303 wait_event_timeout(wq, 417 wait_event_timeout(wq,
304 (atomic_read(&sbi->s_curr_pending) == 0), 418 (atomic_read(&sbi->s_curr_pending) == 0),
@@ -390,6 +504,23 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
390 return 0; 504 return 0;
391} 505}
392 506
507static unsigned __ra_pages(struct exofs_layout *layout)
508{
509 const unsigned _MIN_RA = 32; /* min 128K read-ahead */
510 unsigned ra_pages = layout->group_width * layout->stripe_unit /
511 PAGE_SIZE;
512 unsigned max_io_pages = exofs_max_io_pages(layout, ~0);
513
514 ra_pages *= 2; /* two stripes */
515 if (ra_pages < _MIN_RA)
516 ra_pages = roundup(_MIN_RA, ra_pages / 2);
517
518 if (ra_pages > max_io_pages)
519 ra_pages = max_io_pages;
520
521 return ra_pages;
522}
523
393/* @odi is valid only as long as @fscb_dev is valid */ 524/* @odi is valid only as long as @fscb_dev is valid */
394static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, 525static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
395 struct osd_dev_info *odi) 526 struct osd_dev_info *odi)
@@ -495,7 +626,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
495 } 626 }
496 627
497 od = osduld_info_lookup(&odi); 628 od = osduld_info_lookup(&odi);
498 if (unlikely(IS_ERR(od))) { 629 if (IS_ERR(od)) {
499 ret = PTR_ERR(od); 630 ret = PTR_ERR(od);
500 EXOFS_ERR("ERROR: device requested is not found " 631 EXOFS_ERR("ERROR: device requested is not found "
501 "osd_name-%s =>%d\n", odi.osdname, ret); 632 "osd_name-%s =>%d\n", odi.osdname, ret);
@@ -558,9 +689,17 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
558 goto free_bdi; 689 goto free_bdi;
559 690
560 /* use mount options to fill superblock */ 691 /* use mount options to fill superblock */
561 od = osduld_path_lookup(opts->dev_name); 692 if (opts->is_osdname) {
693 struct osd_dev_info odi = {.systemid_len = 0};
694
695 odi.osdname_len = strlen(opts->dev_name);
696 odi.osdname = (u8 *)opts->dev_name;
697 od = osduld_info_lookup(&odi);
698 } else {
699 od = osduld_path_lookup(opts->dev_name);
700 }
562 if (IS_ERR(od)) { 701 if (IS_ERR(od)) {
563 ret = PTR_ERR(od); 702 ret = -EINVAL;
564 goto free_sbi; 703 goto free_sbi;
565 } 704 }
566 705
@@ -594,6 +733,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
594 goto free_sbi; 733 goto free_sbi;
595 734
596 sb->s_magic = le16_to_cpu(fscb.s_magic); 735 sb->s_magic = le16_to_cpu(fscb.s_magic);
736 /* NOTE: we read below to be backward compatible with old versions */
597 sbi->s_nextid = le64_to_cpu(fscb.s_nextid); 737 sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
598 sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles); 738 sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles);
599 739
@@ -604,7 +744,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
604 ret = -EINVAL; 744 ret = -EINVAL;
605 goto free_sbi; 745 goto free_sbi;
606 } 746 }
607 if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) { 747 if (le32_to_cpu(fscb.s_version) > EXOFS_FSCB_VER) {
608 EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n", 748 EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n",
609 EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version)); 749 EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version));
610 ret = -EINVAL; 750 ret = -EINVAL;
@@ -622,7 +762,10 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
622 goto free_sbi; 762 goto free_sbi;
623 } 763 }
624 764
765 __sbi_read_stats(sbi);
766
625 /* set up operation vectors */ 767 /* set up operation vectors */
768 sbi->bdi.ra_pages = __ra_pages(&sbi->layout);
626 sb->s_bdi = &sbi->bdi; 769 sb->s_bdi = &sbi->bdi;
627 sb->s_fs_info = sbi; 770 sb->s_fs_info = sbi;
628 sb->s_op = &exofs_sops; 771 sb->s_op = &exofs_sops;
@@ -652,6 +795,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
652 795
653 _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], 796 _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0],
654 sbi->layout.s_pid); 797 sbi->layout.s_pid);
798 if (opts->is_osdname)
799 kfree(opts->dev_name);
655 return 0; 800 return 0;
656 801
657free_sbi: 802free_sbi:
@@ -660,6 +805,8 @@ free_bdi:
660 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", 805 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
661 opts->dev_name, sbi->layout.s_pid, ret); 806 opts->dev_name, sbi->layout.s_pid, ret);
662 exofs_free_sbi(sbi); 807 exofs_free_sbi(sbi);
808 if (opts->is_osdname)
809 kfree(opts->dev_name);
663 return ret; 810 return ret;
664} 811}
665 812
@@ -677,7 +824,8 @@ static struct dentry *exofs_mount(struct file_system_type *type,
677 if (ret) 824 if (ret)
678 return ERR_PTR(ret); 825 return ERR_PTR(ret);
679 826
680 opts.dev_name = dev_name; 827 if (!opts.dev_name)
828 opts.dev_name = dev_name;
681 return mount_nodev(type, flags, &opts, exofs_fill_super); 829 return mount_nodev(type, flags, &opts, exofs_fill_super);
682} 830}
683 831
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 7b4180554a62..abea5a17c764 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -406,7 +406,7 @@ ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
406 return -EINVAL; 406 return -EINVAL;
407 if (!test_opt(dentry->d_sb, POSIX_ACL)) 407 if (!test_opt(dentry->d_sb, POSIX_ACL))
408 return -EOPNOTSUPP; 408 return -EOPNOTSUPP;
409 if (!is_owner_or_cap(dentry->d_inode)) 409 if (!inode_owner_or_capable(dentry->d_inode))
410 return -EPERM; 410 return -EPERM;
411 411
412 if (value) { 412 if (value) {
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 1b48c3370872..645be9e7ee47 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -174,3 +174,9 @@ ext2_group_first_block_no(struct super_block *sb, unsigned long group_no)
174 return group_no * (ext2_fsblk_t)EXT2_BLOCKS_PER_GROUP(sb) + 174 return group_no * (ext2_fsblk_t)EXT2_BLOCKS_PER_GROUP(sb) +
175 le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block); 175 le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block);
176} 176}
177
178#define ext2_set_bit __test_and_set_bit_le
179#define ext2_clear_bit __test_and_clear_bit_le
180#define ext2_test_bit test_bit_le
181#define ext2_find_first_zero_bit find_first_zero_bit_le
182#define ext2_find_next_zero_bit find_next_zero_bit_le
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 40ad210a5049..c47f706878b5 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -860,7 +860,6 @@ const struct address_space_operations ext2_aops = {
860 .readpage = ext2_readpage, 860 .readpage = ext2_readpage,
861 .readpages = ext2_readpages, 861 .readpages = ext2_readpages,
862 .writepage = ext2_writepage, 862 .writepage = ext2_writepage,
863 .sync_page = block_sync_page,
864 .write_begin = ext2_write_begin, 863 .write_begin = ext2_write_begin,
865 .write_end = ext2_write_end, 864 .write_end = ext2_write_end,
866 .bmap = ext2_bmap, 865 .bmap = ext2_bmap,
@@ -880,7 +879,6 @@ const struct address_space_operations ext2_nobh_aops = {
880 .readpage = ext2_readpage, 879 .readpage = ext2_readpage,
881 .readpages = ext2_readpages, 880 .readpages = ext2_readpages,
882 .writepage = ext2_nobh_writepage, 881 .writepage = ext2_nobh_writepage,
883 .sync_page = block_sync_page,
884 .write_begin = ext2_nobh_write_begin, 882 .write_begin = ext2_nobh_write_begin,
885 .write_end = nobh_write_end, 883 .write_end = nobh_write_end,
886 .bmap = ext2_bmap, 884 .bmap = ext2_bmap,
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index e7431309bdca..f81e250ac5c4 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -39,7 +39,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
39 if (ret) 39 if (ret)
40 return ret; 40 return ret;
41 41
42 if (!is_owner_or_cap(inode)) { 42 if (!inode_owner_or_capable(inode)) {
43 ret = -EACCES; 43 ret = -EACCES;
44 goto setflags_out; 44 goto setflags_out;
45 } 45 }
@@ -89,7 +89,7 @@ setflags_out:
89 case EXT2_IOC_GETVERSION: 89 case EXT2_IOC_GETVERSION:
90 return put_user(inode->i_generation, (int __user *) arg); 90 return put_user(inode->i_generation, (int __user *) arg);
91 case EXT2_IOC_SETVERSION: 91 case EXT2_IOC_SETVERSION:
92 if (!is_owner_or_cap(inode)) 92 if (!inode_owner_or_capable(inode))
93 return -EPERM; 93 return -EPERM;
94 ret = mnt_want_write(filp->f_path.mnt); 94 ret = mnt_want_write(filp->f_path.mnt);
95 if (ret) 95 if (ret)
@@ -115,7 +115,7 @@ setflags_out:
115 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 115 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
116 return -ENOTTY; 116 return -ENOTTY;
117 117
118 if (!is_owner_or_cap(inode)) 118 if (!inode_owner_or_capable(inode))
119 return -EACCES; 119 return -EACCES;
120 120
121 if (get_user(rsv_window_size, (int __user *)arg)) 121 if (get_user(rsv_window_size, (int __user *)arg))
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index e4fa49e6c539..9d021c0d472a 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -435,7 +435,7 @@ ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
435 return -EINVAL; 435 return -EINVAL;
436 if (!test_opt(inode->i_sb, POSIX_ACL)) 436 if (!test_opt(inode->i_sb, POSIX_ACL))
437 return -EOPNOTSUPP; 437 return -EOPNOTSUPP;
438 if (!is_owner_or_cap(inode)) 438 if (!inode_owner_or_capable(inode))
439 return -EPERM; 439 return -EPERM;
440 440
441 if (value) { 441 if (value) {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ae94f6d949f5..fe2541d250e4 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1894,7 +1894,6 @@ static const struct address_space_operations ext3_ordered_aops = {
1894 .readpage = ext3_readpage, 1894 .readpage = ext3_readpage,
1895 .readpages = ext3_readpages, 1895 .readpages = ext3_readpages,
1896 .writepage = ext3_ordered_writepage, 1896 .writepage = ext3_ordered_writepage,
1897 .sync_page = block_sync_page,
1898 .write_begin = ext3_write_begin, 1897 .write_begin = ext3_write_begin,
1899 .write_end = ext3_ordered_write_end, 1898 .write_end = ext3_ordered_write_end,
1900 .bmap = ext3_bmap, 1899 .bmap = ext3_bmap,
@@ -1910,7 +1909,6 @@ static const struct address_space_operations ext3_writeback_aops = {
1910 .readpage = ext3_readpage, 1909 .readpage = ext3_readpage,
1911 .readpages = ext3_readpages, 1910 .readpages = ext3_readpages,
1912 .writepage = ext3_writeback_writepage, 1911 .writepage = ext3_writeback_writepage,
1913 .sync_page = block_sync_page,
1914 .write_begin = ext3_write_begin, 1912 .write_begin = ext3_write_begin,
1915 .write_end = ext3_writeback_write_end, 1913 .write_end = ext3_writeback_write_end,
1916 .bmap = ext3_bmap, 1914 .bmap = ext3_bmap,
@@ -1926,7 +1924,6 @@ static const struct address_space_operations ext3_journalled_aops = {
1926 .readpage = ext3_readpage, 1924 .readpage = ext3_readpage,
1927 .readpages = ext3_readpages, 1925 .readpages = ext3_readpages,
1928 .writepage = ext3_journalled_writepage, 1926 .writepage = ext3_journalled_writepage,
1929 .sync_page = block_sync_page,
1930 .write_begin = ext3_write_begin, 1927 .write_begin = ext3_write_begin,
1931 .write_end = ext3_journalled_write_end, 1928 .write_end = ext3_journalled_write_end,
1932 .set_page_dirty = ext3_journalled_set_page_dirty, 1929 .set_page_dirty = ext3_journalled_set_page_dirty,
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index fc080dd561f7..f4090bd2f345 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -38,7 +38,7 @@ long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
38 unsigned int oldflags; 38 unsigned int oldflags;
39 unsigned int jflag; 39 unsigned int jflag;
40 40
41 if (!is_owner_or_cap(inode)) 41 if (!inode_owner_or_capable(inode))
42 return -EACCES; 42 return -EACCES;
43 43
44 if (get_user(flags, (int __user *) arg)) 44 if (get_user(flags, (int __user *) arg))
@@ -123,7 +123,7 @@ flags_out:
123 __u32 generation; 123 __u32 generation;
124 int err; 124 int err;
125 125
126 if (!is_owner_or_cap(inode)) 126 if (!inode_owner_or_capable(inode))
127 return -EPERM; 127 return -EPERM;
128 128
129 err = mnt_want_write(filp->f_path.mnt); 129 err = mnt_want_write(filp->f_path.mnt);
@@ -192,7 +192,7 @@ setversion_out:
192 if (err) 192 if (err)
193 return err; 193 return err;
194 194
195 if (!is_owner_or_cap(inode)) { 195 if (!inode_owner_or_capable(inode)) {
196 err = -EACCES; 196 err = -EACCES;
197 goto setrsvsz_out; 197 goto setrsvsz_out;
198 } 198 }
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index e0270d1f8d82..21eacd7b7d79 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -433,7 +433,7 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
433 return -EINVAL; 433 return -EINVAL;
434 if (!test_opt(inode->i_sb, POSIX_ACL)) 434 if (!test_opt(inode->i_sb, POSIX_ACL))
435 return -EOPNOTSUPP; 435 return -EOPNOTSUPP;
436 if (!is_owner_or_cap(inode)) 436 if (!inode_owner_or_capable(inode))
437 return -EPERM; 437 return -EPERM;
438 438
439 if (value) { 439 if (value) {
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index adf96b822781..97b970e7dd13 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -21,6 +21,8 @@
21#include "ext4_jbd2.h" 21#include "ext4_jbd2.h"
22#include "mballoc.h" 22#include "mballoc.h"
23 23
24#include <trace/events/ext4.h>
25
24/* 26/*
25 * balloc.c contains the blocks allocation and deallocation routines 27 * balloc.c contains the blocks allocation and deallocation routines
26 */ 28 */
@@ -342,6 +344,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
342 * We do it here so the bitmap uptodate bit 344 * We do it here so the bitmap uptodate bit
343 * get set with buffer lock held. 345 * get set with buffer lock held.
344 */ 346 */
347 trace_ext4_read_block_bitmap_load(sb, block_group);
345 set_bitmap_uptodate(bh); 348 set_bitmap_uptodate(bh);
346 if (bh_submit_read(bh) < 0) { 349 if (bh_submit_read(bh) < 0) {
347 put_bh(bh); 350 put_bh(bh);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3aa0b72b3b94..4daaf2b753f4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -923,14 +923,14 @@ struct ext4_inode_info {
923#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ 923#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \
924 EXT4_MOUNT2_##opt) 924 EXT4_MOUNT2_##opt)
925 925
926#define ext4_set_bit ext2_set_bit 926#define ext4_set_bit __test_and_set_bit_le
927#define ext4_set_bit_atomic ext2_set_bit_atomic 927#define ext4_set_bit_atomic ext2_set_bit_atomic
928#define ext4_clear_bit ext2_clear_bit 928#define ext4_clear_bit __test_and_clear_bit_le
929#define ext4_clear_bit_atomic ext2_clear_bit_atomic 929#define ext4_clear_bit_atomic ext2_clear_bit_atomic
930#define ext4_test_bit ext2_test_bit 930#define ext4_test_bit test_bit_le
931#define ext4_find_first_zero_bit ext2_find_first_zero_bit 931#define ext4_find_first_zero_bit find_first_zero_bit_le
932#define ext4_find_next_zero_bit ext2_find_next_zero_bit 932#define ext4_find_next_zero_bit find_next_zero_bit_le
933#define ext4_find_next_bit ext2_find_next_bit 933#define ext4_find_next_bit find_next_bit_le
934 934
935/* 935/*
936 * Maximal mount counts between two filesystem checks 936 * Maximal mount counts between two filesystem checks
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index d8b992e658c1..e25e99bf7ee1 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -202,13 +202,6 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
202 return 1; 202 return 1;
203} 203}
204 204
205static inline void ext4_journal_release_buffer(handle_t *handle,
206 struct buffer_head *bh)
207{
208 if (ext4_handle_valid(handle))
209 jbd2_journal_release_buffer(handle, bh);
210}
211
212static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) 205static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
213{ 206{
214 return ext4_journal_start_sb(inode->i_sb, nblocks); 207 return ext4_journal_start_sb(inode->i_sb, nblocks);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7516fb9c0bd5..dd2cb5076ff9 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,6 +44,8 @@
44#include "ext4_jbd2.h" 44#include "ext4_jbd2.h"
45#include "ext4_extents.h" 45#include "ext4_extents.h"
46 46
47#include <trace/events/ext4.h>
48
47static int ext4_ext_truncate_extend_restart(handle_t *handle, 49static int ext4_ext_truncate_extend_restart(handle_t *handle,
48 struct inode *inode, 50 struct inode *inode,
49 int needed) 51 int needed)
@@ -664,6 +666,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
664 if (unlikely(!bh)) 666 if (unlikely(!bh))
665 goto err; 667 goto err;
666 if (!bh_uptodate_or_lock(bh)) { 668 if (!bh_uptodate_or_lock(bh)) {
669 trace_ext4_ext_load_extent(inode, block,
670 path[ppos].p_block);
667 if (bh_submit_read(bh) < 0) { 671 if (bh_submit_read(bh) < 0) {
668 put_bh(bh); 672 put_bh(bh);
669 goto err; 673 goto err;
@@ -1034,7 +1038,7 @@ cleanup:
1034 for (i = 0; i < depth; i++) { 1038 for (i = 0; i < depth; i++) {
1035 if (!ablocks[i]) 1039 if (!ablocks[i])
1036 continue; 1040 continue;
1037 ext4_free_blocks(handle, inode, 0, ablocks[i], 1, 1041 ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
1038 EXT4_FREE_BLOCKS_METADATA); 1042 EXT4_FREE_BLOCKS_METADATA);
1039 } 1043 }
1040 } 1044 }
@@ -2059,7 +2063,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2059 if (err) 2063 if (err)
2060 return err; 2064 return err;
2061 ext_debug("index is empty, remove it, free block %llu\n", leaf); 2065 ext_debug("index is empty, remove it, free block %llu\n", leaf);
2062 ext4_free_blocks(handle, inode, 0, leaf, 1, 2066 ext4_free_blocks(handle, inode, NULL, leaf, 1,
2063 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 2067 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
2064 return err; 2068 return err;
2065} 2069}
@@ -2156,7 +2160,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2156 num = le32_to_cpu(ex->ee_block) + ee_len - from; 2160 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2157 start = ext4_ext_pblock(ex) + ee_len - num; 2161 start = ext4_ext_pblock(ex) + ee_len - num;
2158 ext_debug("free last %u blocks starting %llu\n", num, start); 2162 ext_debug("free last %u blocks starting %llu\n", num, start);
2159 ext4_free_blocks(handle, inode, 0, start, num, flags); 2163 ext4_free_blocks(handle, inode, NULL, start, num, flags);
2160 } else if (from == le32_to_cpu(ex->ee_block) 2164 } else if (from == le32_to_cpu(ex->ee_block)
2161 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { 2165 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
2162 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", 2166 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -3108,14 +3112,13 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3108{ 3112{
3109 int i, depth; 3113 int i, depth;
3110 struct ext4_extent_header *eh; 3114 struct ext4_extent_header *eh;
3111 struct ext4_extent *ex, *last_ex; 3115 struct ext4_extent *last_ex;
3112 3116
3113 if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) 3117 if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
3114 return 0; 3118 return 0;
3115 3119
3116 depth = ext_depth(inode); 3120 depth = ext_depth(inode);
3117 eh = path[depth].p_hdr; 3121 eh = path[depth].p_hdr;
3118 ex = path[depth].p_ext;
3119 3122
3120 if (unlikely(!eh->eh_entries)) { 3123 if (unlikely(!eh->eh_entries)) {
3121 EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " 3124 EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
@@ -3295,9 +3298,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3295 struct ext4_map_blocks *map, int flags) 3298 struct ext4_map_blocks *map, int flags)
3296{ 3299{
3297 struct ext4_ext_path *path = NULL; 3300 struct ext4_ext_path *path = NULL;
3298 struct ext4_extent_header *eh;
3299 struct ext4_extent newex, *ex; 3301 struct ext4_extent newex, *ex;
3300 ext4_fsblk_t newblock; 3302 ext4_fsblk_t newblock = 0;
3301 int err = 0, depth, ret; 3303 int err = 0, depth, ret;
3302 unsigned int allocated = 0; 3304 unsigned int allocated = 0;
3303 struct ext4_allocation_request ar; 3305 struct ext4_allocation_request ar;
@@ -3305,6 +3307,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3305 3307
3306 ext_debug("blocks %u/%u requested for inode %lu\n", 3308 ext_debug("blocks %u/%u requested for inode %lu\n",
3307 map->m_lblk, map->m_len, inode->i_ino); 3309 map->m_lblk, map->m_len, inode->i_ino);
3310 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
3308 3311
3309 /* check in cache */ 3312 /* check in cache */
3310 if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { 3313 if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
@@ -3352,7 +3355,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3352 err = -EIO; 3355 err = -EIO;
3353 goto out2; 3356 goto out2;
3354 } 3357 }
3355 eh = path[depth].p_hdr;
3356 3358
3357 ex = path[depth].p_ext; 3359 ex = path[depth].p_ext;
3358 if (ex) { 3360 if (ex) {
@@ -3485,7 +3487,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3485 /* not a good idea to call discard here directly, 3487 /* not a good idea to call discard here directly,
3486 * but otherwise we'd need to call it every free() */ 3488 * but otherwise we'd need to call it every free() */
3487 ext4_discard_preallocations(inode); 3489 ext4_discard_preallocations(inode);
3488 ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex), 3490 ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
3489 ext4_ext_get_actual_len(&newex), 0); 3491 ext4_ext_get_actual_len(&newex), 0);
3490 goto out2; 3492 goto out2;
3491 } 3493 }
@@ -3525,6 +3527,8 @@ out2:
3525 ext4_ext_drop_refs(path); 3527 ext4_ext_drop_refs(path);
3526 kfree(path); 3528 kfree(path);
3527 } 3529 }
3530 trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
3531 newblock, map->m_len, err ? err : allocated);
3528 return err ? err : allocated; 3532 return err ? err : allocated;
3529} 3533}
3530 3534
@@ -3658,6 +3662,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
3658 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 3662 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
3659 return -EOPNOTSUPP; 3663 return -EOPNOTSUPP;
3660 3664
3665 trace_ext4_fallocate_enter(inode, offset, len, mode);
3661 map.m_lblk = offset >> blkbits; 3666 map.m_lblk = offset >> blkbits;
3662 /* 3667 /*
3663 * We can't just convert len to max_blocks because 3668 * We can't just convert len to max_blocks because
@@ -3673,6 +3678,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
3673 ret = inode_newsize_ok(inode, (len + offset)); 3678 ret = inode_newsize_ok(inode, (len + offset));
3674 if (ret) { 3679 if (ret) {
3675 mutex_unlock(&inode->i_mutex); 3680 mutex_unlock(&inode->i_mutex);
3681 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
3676 return ret; 3682 return ret;
3677 } 3683 }
3678retry: 3684retry:
@@ -3717,6 +3723,8 @@ retry:
3717 goto retry; 3723 goto retry;
3718 } 3724 }
3719 mutex_unlock(&inode->i_mutex); 3725 mutex_unlock(&inode->i_mutex);
3726 trace_ext4_fallocate_exit(inode, offset, max_blocks,
3727 ret > 0 ? ret2 : ret);
3720 return ret > 0 ? ret2 : ret; 3728 return ret > 0 ? ret2 : ret;
3721} 3729}
3722 3730
@@ -3775,6 +3783,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
3775 } 3783 }
3776 return ret > 0 ? ret2 : ret; 3784 return ret > 0 ? ret2 : ret;
3777} 3785}
3786
3778/* 3787/*
3779 * Callback function called for each extent to gather FIEMAP information. 3788 * Callback function called for each extent to gather FIEMAP information.
3780 */ 3789 */
@@ -3782,38 +3791,162 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3782 struct ext4_ext_cache *newex, struct ext4_extent *ex, 3791 struct ext4_ext_cache *newex, struct ext4_extent *ex,
3783 void *data) 3792 void *data)
3784{ 3793{
3785 struct fiemap_extent_info *fieinfo = data;
3786 unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
3787 __u64 logical; 3794 __u64 logical;
3788 __u64 physical; 3795 __u64 physical;
3789 __u64 length; 3796 __u64 length;
3797 loff_t size;
3790 __u32 flags = 0; 3798 __u32 flags = 0;
3791 int error; 3799 int ret = 0;
3800 struct fiemap_extent_info *fieinfo = data;
3801 unsigned char blksize_bits;
3792 3802
3793 logical = (__u64)newex->ec_block << blksize_bits; 3803 blksize_bits = inode->i_sb->s_blocksize_bits;
3804 logical = (__u64)newex->ec_block << blksize_bits;
3794 3805
3795 if (newex->ec_start == 0) { 3806 if (newex->ec_start == 0) {
3796 pgoff_t offset; 3807 /*
3797 struct page *page; 3808 * No extent in extent-tree contains block @newex->ec_start,
3809 * then the block may stay in 1)a hole or 2)delayed-extent.
3810 *
3811 * Holes or delayed-extents are processed as follows.
3812 * 1. lookup dirty pages with specified range in pagecache.
3813 * If no page is got, then there is no delayed-extent and
3814 * return with EXT_CONTINUE.
3815 * 2. find the 1st mapped buffer,
3816 * 3. check if the mapped buffer is both in the request range
3817 * and a delayed buffer. If not, there is no delayed-extent,
3818 * then return.
3819 * 4. a delayed-extent is found, the extent will be collected.
3820 */
3821 ext4_lblk_t end = 0;
3822 pgoff_t last_offset;
3823 pgoff_t offset;
3824 pgoff_t index;
3825 struct page **pages = NULL;
3798 struct buffer_head *bh = NULL; 3826 struct buffer_head *bh = NULL;
3827 struct buffer_head *head = NULL;
3828 unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *);
3829
3830 pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
3831 if (pages == NULL)
3832 return -ENOMEM;
3799 3833
3800 offset = logical >> PAGE_SHIFT; 3834 offset = logical >> PAGE_SHIFT;
3801 page = find_get_page(inode->i_mapping, offset); 3835repeat:
3802 if (!page || !page_has_buffers(page)) 3836 last_offset = offset;
3803 return EXT_CONTINUE; 3837 head = NULL;
3838 ret = find_get_pages_tag(inode->i_mapping, &offset,
3839 PAGECACHE_TAG_DIRTY, nr_pages, pages);
3840
3841 if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
3842 /* First time, try to find a mapped buffer. */
3843 if (ret == 0) {
3844out:
3845 for (index = 0; index < ret; index++)
3846 page_cache_release(pages[index]);
3847 /* just a hole. */
3848 kfree(pages);
3849 return EXT_CONTINUE;
3850 }
3804 3851
3805 bh = page_buffers(page); 3852 /* Try to find the 1st mapped buffer. */
3853 end = ((__u64)pages[0]->index << PAGE_SHIFT) >>
3854 blksize_bits;
3855 if (!page_has_buffers(pages[0]))
3856 goto out;
3857 head = page_buffers(pages[0]);
3858 if (!head)
3859 goto out;
3806 3860
3807 if (!bh) 3861 bh = head;
3808 return EXT_CONTINUE; 3862 do {
3863 if (buffer_mapped(bh)) {
3864 /* get the 1st mapped buffer. */
3865 if (end > newex->ec_block +
3866 newex->ec_len)
3867 /* The buffer is out of
3868 * the request range.
3869 */
3870 goto out;
3871 goto found_mapped_buffer;
3872 }
3873 bh = bh->b_this_page;
3874 end++;
3875 } while (bh != head);
3809 3876
3810 if (buffer_delay(bh)) { 3877 /* No mapped buffer found. */
3811 flags |= FIEMAP_EXTENT_DELALLOC; 3878 goto out;
3812 page_cache_release(page);
3813 } else { 3879 } else {
3814 page_cache_release(page); 3880 /*Find contiguous delayed buffers. */
3815 return EXT_CONTINUE; 3881 if (ret > 0 && pages[0]->index == last_offset)
3882 head = page_buffers(pages[0]);
3883 bh = head;
3816 } 3884 }
3885
3886found_mapped_buffer:
3887 if (bh != NULL && buffer_delay(bh)) {
3888 /* 1st or contiguous delayed buffer found. */
3889 if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
3890 /*
3891 * 1st delayed buffer found, record
3892 * the start of extent.
3893 */
3894 flags |= FIEMAP_EXTENT_DELALLOC;
3895 newex->ec_block = end;
3896 logical = (__u64)end << blksize_bits;
3897 }
3898 /* Find contiguous delayed buffers. */
3899 do {
3900 if (!buffer_delay(bh))
3901 goto found_delayed_extent;
3902 bh = bh->b_this_page;
3903 end++;
3904 } while (bh != head);
3905
3906 for (index = 1; index < ret; index++) {
3907 if (!page_has_buffers(pages[index])) {
3908 bh = NULL;
3909 break;
3910 }
3911 head = page_buffers(pages[index]);
3912 if (!head) {
3913 bh = NULL;
3914 break;
3915 }
3916 if (pages[index]->index !=
3917 pages[0]->index + index) {
3918 /* Blocks are not contiguous. */
3919 bh = NULL;
3920 break;
3921 }
3922 bh = head;
3923 do {
3924 if (!buffer_delay(bh))
3925 /* Delayed-extent ends. */
3926 goto found_delayed_extent;
3927 bh = bh->b_this_page;
3928 end++;
3929 } while (bh != head);
3930 }
3931 } else if (!(flags & FIEMAP_EXTENT_DELALLOC))
3932 /* a hole found. */
3933 goto out;
3934
3935found_delayed_extent:
3936 newex->ec_len = min(end - newex->ec_block,
3937 (ext4_lblk_t)EXT_INIT_MAX_LEN);
3938 if (ret == nr_pages && bh != NULL &&
3939 newex->ec_len < EXT_INIT_MAX_LEN &&
3940 buffer_delay(bh)) {
3941 /* Have not collected an extent and continue. */
3942 for (index = 0; index < ret; index++)
3943 page_cache_release(pages[index]);
3944 goto repeat;
3945 }
3946
3947 for (index = 0; index < ret; index++)
3948 page_cache_release(pages[index]);
3949 kfree(pages);
3817 } 3950 }
3818 3951
3819 physical = (__u64)newex->ec_start << blksize_bits; 3952 physical = (__u64)newex->ec_start << blksize_bits;
@@ -3822,32 +3955,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3822 if (ex && ext4_ext_is_uninitialized(ex)) 3955 if (ex && ext4_ext_is_uninitialized(ex))
3823 flags |= FIEMAP_EXTENT_UNWRITTEN; 3956 flags |= FIEMAP_EXTENT_UNWRITTEN;
3824 3957
3825 /* 3958 size = i_size_read(inode);
3826 * If this extent reaches EXT_MAX_BLOCK, it must be last. 3959 if (logical + length >= size)
3827 *
3828 * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
3829 * this also indicates no more allocated blocks.
3830 *
3831 * XXX this might miss a single-block extent at EXT_MAX_BLOCK
3832 */
3833 if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
3834 newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
3835 loff_t size = i_size_read(inode);
3836 loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
3837
3838 flags |= FIEMAP_EXTENT_LAST; 3960 flags |= FIEMAP_EXTENT_LAST;
3839 if ((flags & FIEMAP_EXTENT_DELALLOC) &&
3840 logical+length > size)
3841 length = (size - logical + bs - 1) & ~(bs-1);
3842 }
3843 3961
3844 error = fiemap_fill_next_extent(fieinfo, logical, physical, 3962 ret = fiemap_fill_next_extent(fieinfo, logical, physical,
3845 length, flags); 3963 length, flags);
3846 if (error < 0) 3964 if (ret < 0)
3847 return error; 3965 return ret;
3848 if (error == 1) 3966 if (ret == 1)
3849 return EXT_BREAK; 3967 return EXT_BREAK;
3850
3851 return EXT_CONTINUE; 3968 return EXT_CONTINUE;
3852} 3969}
3853 3970
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 7829b287822a..7f74019d6d77 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -164,20 +164,20 @@ int ext4_sync_file(struct file *file, int datasync)
164 164
165 J_ASSERT(ext4_journal_current_handle() == NULL); 165 J_ASSERT(ext4_journal_current_handle() == NULL);
166 166
167 trace_ext4_sync_file(file, datasync); 167 trace_ext4_sync_file_enter(file, datasync);
168 168
169 if (inode->i_sb->s_flags & MS_RDONLY) 169 if (inode->i_sb->s_flags & MS_RDONLY)
170 return 0; 170 return 0;
171 171
172 ret = ext4_flush_completed_IO(inode); 172 ret = ext4_flush_completed_IO(inode);
173 if (ret < 0) 173 if (ret < 0)
174 return ret; 174 goto out;
175 175
176 if (!journal) { 176 if (!journal) {
177 ret = generic_file_fsync(file, datasync); 177 ret = generic_file_fsync(file, datasync);
178 if (!ret && !list_empty(&inode->i_dentry)) 178 if (!ret && !list_empty(&inode->i_dentry))
179 ext4_sync_parent(inode); 179 ext4_sync_parent(inode);
180 return ret; 180 goto out;
181 } 181 }
182 182
183 /* 183 /*
@@ -194,8 +194,10 @@ int ext4_sync_file(struct file *file, int datasync)
194 * (they were dirtied by commit). But that's OK - the blocks are 194 * (they were dirtied by commit). But that's OK - the blocks are
195 * safe in-journal, which is all fsync() needs to ensure. 195 * safe in-journal, which is all fsync() needs to ensure.
196 */ 196 */
197 if (ext4_should_journal_data(inode)) 197 if (ext4_should_journal_data(inode)) {
198 return ext4_force_commit(inode->i_sb); 198 ret = ext4_force_commit(inode->i_sb);
199 goto out;
200 }
199 201
200 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; 202 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
201 if (jbd2_log_start_commit(journal, commit_tid)) { 203 if (jbd2_log_start_commit(journal, commit_tid)) {
@@ -215,5 +217,7 @@ int ext4_sync_file(struct file *file, int datasync)
215 ret = jbd2_log_wait_commit(journal, commit_tid); 217 ret = jbd2_log_wait_commit(journal, commit_tid);
216 } else if (journal->j_flags & JBD2_BARRIER) 218 } else if (journal->j_flags & JBD2_BARRIER)
217 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 219 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
220 out:
221 trace_ext4_sync_file_exit(inode, ret);
218 return ret; 222 return ret;
219} 223}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 78b79e1bd7ed..21bb2f61e502 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -152,6 +152,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
152 * We do it here so the bitmap uptodate bit 152 * We do it here so the bitmap uptodate bit
153 * get set with buffer lock held. 153 * get set with buffer lock held.
154 */ 154 */
155 trace_ext4_load_inode_bitmap(sb, block_group);
155 set_bitmap_uptodate(bh); 156 set_bitmap_uptodate(bh);
156 if (bh_submit_read(bh) < 0) { 157 if (bh_submit_read(bh) < 0) {
157 put_bh(bh); 158 put_bh(bh);
@@ -649,7 +650,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
649 *group = parent_group + flex_size; 650 *group = parent_group + flex_size;
650 if (*group > ngroups) 651 if (*group > ngroups)
651 *group = 0; 652 *group = 0;
652 return find_group_orlov(sb, parent, group, mode, 0); 653 return find_group_orlov(sb, parent, group, mode, NULL);
653 } 654 }
654 655
655 /* 656 /*
@@ -1054,6 +1055,11 @@ got:
1054 } 1055 }
1055 } 1056 }
1056 1057
1058 if (ext4_handle_valid(handle)) {
1059 ei->i_sync_tid = handle->h_transaction->t_tid;
1060 ei->i_datasync_tid = handle->h_transaction->t_tid;
1061 }
1062
1057 err = ext4_mark_inode_dirty(handle, inode); 1063 err = ext4_mark_inode_dirty(handle, inode);
1058 if (err) { 1064 if (err) {
1059 ext4_std_error(sb, err); 1065 ext4_std_error(sb, err);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9f7f9e49914f..1a86282b9024 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -173,7 +173,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
173 BUG_ON(EXT4_JOURNAL(inode) == NULL); 173 BUG_ON(EXT4_JOURNAL(inode) == NULL);
174 jbd_debug(2, "restarting handle %p\n", handle); 174 jbd_debug(2, "restarting handle %p\n", handle);
175 up_write(&EXT4_I(inode)->i_data_sem); 175 up_write(&EXT4_I(inode)->i_data_sem);
176 ret = ext4_journal_restart(handle, blocks_for_truncate(inode)); 176 ret = ext4_journal_restart(handle, nblocks);
177 down_write(&EXT4_I(inode)->i_data_sem); 177 down_write(&EXT4_I(inode)->i_data_sem);
178 ext4_discard_preallocations(inode); 178 ext4_discard_preallocations(inode);
179 179
@@ -720,7 +720,7 @@ allocated:
720 return ret; 720 return ret;
721failed_out: 721failed_out:
722 for (i = 0; i < index; i++) 722 for (i = 0; i < index; i++)
723 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); 723 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
724 return ret; 724 return ret;
725} 725}
726 726
@@ -823,20 +823,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
823 return err; 823 return err;
824failed: 824failed:
825 /* Allocation failed, free what we already allocated */ 825 /* Allocation failed, free what we already allocated */
826 ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); 826 ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
827 for (i = 1; i <= n ; i++) { 827 for (i = 1; i <= n ; i++) {
828 /* 828 /*
829 * branch[i].bh is newly allocated, so there is no 829 * branch[i].bh is newly allocated, so there is no
830 * need to revoke the block, which is why we don't 830 * need to revoke the block, which is why we don't
831 * need to set EXT4_FREE_BLOCKS_METADATA. 831 * need to set EXT4_FREE_BLOCKS_METADATA.
832 */ 832 */
833 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 833 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
834 EXT4_FREE_BLOCKS_FORGET); 834 EXT4_FREE_BLOCKS_FORGET);
835 } 835 }
836 for (i = n+1; i < indirect_blks; i++) 836 for (i = n+1; i < indirect_blks; i++)
837 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); 837 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
838 838
839 ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0); 839 ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
840 840
841 return err; 841 return err;
842} 842}
@@ -924,7 +924,7 @@ err_out:
924 ext4_free_blocks(handle, inode, where[i].bh, 0, 1, 924 ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
925 EXT4_FREE_BLOCKS_FORGET); 925 EXT4_FREE_BLOCKS_FORGET);
926 } 926 }
927 ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key), 927 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
928 blks, 0); 928 blks, 0);
929 929
930 return err; 930 return err;
@@ -973,6 +973,7 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
973 int count = 0; 973 int count = 0;
974 ext4_fsblk_t first_block = 0; 974 ext4_fsblk_t first_block = 0;
975 975
976 trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
976 J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); 977 J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
977 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); 978 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
978 depth = ext4_block_to_path(inode, map->m_lblk, offsets, 979 depth = ext4_block_to_path(inode, map->m_lblk, offsets,
@@ -1058,6 +1059,8 @@ cleanup:
1058 partial--; 1059 partial--;
1059 } 1060 }
1060out: 1061out:
1062 trace_ext4_ind_map_blocks_exit(inode, map->m_lblk,
1063 map->m_pblk, map->m_len, err);
1061 return err; 1064 return err;
1062} 1065}
1063 1066
@@ -2060,7 +2063,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2060 if (nr_pages == 0) 2063 if (nr_pages == 0)
2061 break; 2064 break;
2062 for (i = 0; i < nr_pages; i++) { 2065 for (i = 0; i < nr_pages; i++) {
2063 int commit_write = 0, redirty_page = 0; 2066 int commit_write = 0, skip_page = 0;
2064 struct page *page = pvec.pages[i]; 2067 struct page *page = pvec.pages[i];
2065 2068
2066 index = page->index; 2069 index = page->index;
@@ -2086,14 +2089,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2086 * If the page does not have buffers (for 2089 * If the page does not have buffers (for
2087 * whatever reason), try to create them using 2090 * whatever reason), try to create them using
2088 * __block_write_begin. If this fails, 2091 * __block_write_begin. If this fails,
2089 * redirty the page and move on. 2092 * skip the page and move on.
2090 */ 2093 */
2091 if (!page_has_buffers(page)) { 2094 if (!page_has_buffers(page)) {
2092 if (__block_write_begin(page, 0, len, 2095 if (__block_write_begin(page, 0, len,
2093 noalloc_get_block_write)) { 2096 noalloc_get_block_write)) {
2094 redirty_page: 2097 skip_page:
2095 redirty_page_for_writepage(mpd->wbc,
2096 page);
2097 unlock_page(page); 2098 unlock_page(page);
2098 continue; 2099 continue;
2099 } 2100 }
@@ -2104,7 +2105,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2104 block_start = 0; 2105 block_start = 0;
2105 do { 2106 do {
2106 if (!bh) 2107 if (!bh)
2107 goto redirty_page; 2108 goto skip_page;
2108 if (map && (cur_logical >= map->m_lblk) && 2109 if (map && (cur_logical >= map->m_lblk) &&
2109 (cur_logical <= (map->m_lblk + 2110 (cur_logical <= (map->m_lblk +
2110 (map->m_len - 1)))) { 2111 (map->m_len - 1)))) {
@@ -2120,22 +2121,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2120 clear_buffer_unwritten(bh); 2121 clear_buffer_unwritten(bh);
2121 } 2122 }
2122 2123
2123 /* redirty page if block allocation undone */ 2124 /* skip page if block allocation undone */
2124 if (buffer_delay(bh) || buffer_unwritten(bh)) 2125 if (buffer_delay(bh) || buffer_unwritten(bh))
2125 redirty_page = 1; 2126 skip_page = 1;
2126 bh = bh->b_this_page; 2127 bh = bh->b_this_page;
2127 block_start += bh->b_size; 2128 block_start += bh->b_size;
2128 cur_logical++; 2129 cur_logical++;
2129 pblock++; 2130 pblock++;
2130 } while (bh != page_bufs); 2131 } while (bh != page_bufs);
2131 2132
2132 if (redirty_page) 2133 if (skip_page)
2133 goto redirty_page; 2134 goto skip_page;
2134 2135
2135 if (commit_write) 2136 if (commit_write)
2136 /* mark the buffer_heads as dirty & uptodate */ 2137 /* mark the buffer_heads as dirty & uptodate */
2137 block_commit_write(page, 0, len); 2138 block_commit_write(page, 0, len);
2138 2139
2140 clear_page_dirty_for_io(page);
2139 /* 2141 /*
2140 * Delalloc doesn't support data journalling, 2142 * Delalloc doesn't support data journalling,
2141 * but eventually maybe we'll lift this 2143 * but eventually maybe we'll lift this
@@ -2165,8 +2167,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2165 return ret; 2167 return ret;
2166} 2168}
2167 2169
2168static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, 2170static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
2169 sector_t logical, long blk_cnt)
2170{ 2171{
2171 int nr_pages, i; 2172 int nr_pages, i;
2172 pgoff_t index, end; 2173 pgoff_t index, end;
@@ -2174,9 +2175,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
2174 struct inode *inode = mpd->inode; 2175 struct inode *inode = mpd->inode;
2175 struct address_space *mapping = inode->i_mapping; 2176 struct address_space *mapping = inode->i_mapping;
2176 2177
2177 index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); 2178 index = mpd->first_page;
2178 end = (logical + blk_cnt - 1) >> 2179 end = mpd->next_page - 1;
2179 (PAGE_CACHE_SHIFT - inode->i_blkbits);
2180 while (index <= end) { 2180 while (index <= end) {
2181 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); 2181 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
2182 if (nr_pages == 0) 2182 if (nr_pages == 0)
@@ -2279,9 +2279,8 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
2279 err = blks; 2279 err = blks;
2280 /* 2280 /*
2281 * If get block returns EAGAIN or ENOSPC and there 2281 * If get block returns EAGAIN or ENOSPC and there
2282 * appears to be free blocks we will call 2282 * appears to be free blocks we will just let
2283 * ext4_writepage() for all of the pages which will 2283 * mpage_da_submit_io() unlock all of the pages.
2284 * just redirty the pages.
2285 */ 2284 */
2286 if (err == -EAGAIN) 2285 if (err == -EAGAIN)
2287 goto submit_io; 2286 goto submit_io;
@@ -2312,8 +2311,10 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
2312 ext4_print_free_blocks(mpd->inode); 2311 ext4_print_free_blocks(mpd->inode);
2313 } 2312 }
2314 /* invalidate all the pages */ 2313 /* invalidate all the pages */
2315 ext4_da_block_invalidatepages(mpd, next, 2314 ext4_da_block_invalidatepages(mpd);
2316 mpd->b_size >> mpd->inode->i_blkbits); 2315
2316 /* Mark this page range as having been completed */
2317 mpd->io_done = 1;
2317 return; 2318 return;
2318 } 2319 }
2319 BUG_ON(blks == 0); 2320 BUG_ON(blks == 0);
@@ -2438,102 +2439,6 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
2438} 2439}
2439 2440
2440/* 2441/*
2441 * __mpage_da_writepage - finds extent of pages and blocks
2442 *
2443 * @page: page to consider
2444 * @wbc: not used, we just follow rules
2445 * @data: context
2446 *
2447 * The function finds extents of pages and scan them for all blocks.
2448 */
2449static int __mpage_da_writepage(struct page *page,
2450 struct writeback_control *wbc,
2451 struct mpage_da_data *mpd)
2452{
2453 struct inode *inode = mpd->inode;
2454 struct buffer_head *bh, *head;
2455 sector_t logical;
2456
2457 /*
2458 * Can we merge this page to current extent?
2459 */
2460 if (mpd->next_page != page->index) {
2461 /*
2462 * Nope, we can't. So, we map non-allocated blocks
2463 * and start IO on them
2464 */
2465 if (mpd->next_page != mpd->first_page) {
2466 mpage_da_map_and_submit(mpd);
2467 /*
2468 * skip rest of the page in the page_vec
2469 */
2470 redirty_page_for_writepage(wbc, page);
2471 unlock_page(page);
2472 return MPAGE_DA_EXTENT_TAIL;
2473 }
2474
2475 /*
2476 * Start next extent of pages ...
2477 */
2478 mpd->first_page = page->index;
2479
2480 /*
2481 * ... and blocks
2482 */
2483 mpd->b_size = 0;
2484 mpd->b_state = 0;
2485 mpd->b_blocknr = 0;
2486 }
2487
2488 mpd->next_page = page->index + 1;
2489 logical = (sector_t) page->index <<
2490 (PAGE_CACHE_SHIFT - inode->i_blkbits);
2491
2492 if (!page_has_buffers(page)) {
2493 mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE,
2494 (1 << BH_Dirty) | (1 << BH_Uptodate));
2495 if (mpd->io_done)
2496 return MPAGE_DA_EXTENT_TAIL;
2497 } else {
2498 /*
2499 * Page with regular buffer heads, just add all dirty ones
2500 */
2501 head = page_buffers(page);
2502 bh = head;
2503 do {
2504 BUG_ON(buffer_locked(bh));
2505 /*
2506 * We need to try to allocate
2507 * unmapped blocks in the same page.
2508 * Otherwise we won't make progress
2509 * with the page in ext4_writepage
2510 */
2511 if (ext4_bh_delay_or_unwritten(NULL, bh)) {
2512 mpage_add_bh_to_extent(mpd, logical,
2513 bh->b_size,
2514 bh->b_state);
2515 if (mpd->io_done)
2516 return MPAGE_DA_EXTENT_TAIL;
2517 } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
2518 /*
2519 * mapped dirty buffer. We need to update
2520 * the b_state because we look at
2521 * b_state in mpage_da_map_blocks. We don't
2522 * update b_size because if we find an
2523 * unmapped buffer_head later we need to
2524 * use the b_state flag of that buffer_head.
2525 */
2526 if (mpd->b_size == 0)
2527 mpd->b_state = bh->b_state & BH_FLAGS;
2528 }
2529 logical++;
2530 } while ((bh = bh->b_this_page) != head);
2531 }
2532
2533 return 0;
2534}
2535
2536/*
2537 * This is a special get_blocks_t callback which is used by 2442 * This is a special get_blocks_t callback which is used by
2538 * ext4_da_write_begin(). It will either return mapped block or 2443 * ext4_da_write_begin(). It will either return mapped block or
2539 * reserve space for a single block. 2444 * reserve space for a single block.
@@ -2597,7 +2502,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2597 * for partial write. 2502 * for partial write.
2598 */ 2503 */
2599 set_buffer_new(bh); 2504 set_buffer_new(bh);
2600 set_buffer_mapped(bh);
2601 } 2505 }
2602 return 0; 2506 return 0;
2603} 2507}
@@ -2811,27 +2715,27 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2811 2715
2812/* 2716/*
2813 * write_cache_pages_da - walk the list of dirty pages of the given 2717 * write_cache_pages_da - walk the list of dirty pages of the given
2814 * address space and call the callback function (which usually writes 2718 * address space and accumulate pages that need writing, and call
2815 * the pages). 2719 * mpage_da_map_and_submit to map a single contiguous memory region
2816 * 2720 * and then write them.
2817 * This is a forked version of write_cache_pages(). Differences:
2818 * Range cyclic is ignored.
2819 * no_nrwrite_index_update is always presumed true
2820 */ 2721 */
2821static int write_cache_pages_da(struct address_space *mapping, 2722static int write_cache_pages_da(struct address_space *mapping,
2822 struct writeback_control *wbc, 2723 struct writeback_control *wbc,
2823 struct mpage_da_data *mpd, 2724 struct mpage_da_data *mpd,
2824 pgoff_t *done_index) 2725 pgoff_t *done_index)
2825{ 2726{
2826 int ret = 0; 2727 struct buffer_head *bh, *head;
2827 int done = 0; 2728 struct inode *inode = mapping->host;
2828 struct pagevec pvec; 2729 struct pagevec pvec;
2829 unsigned nr_pages; 2730 unsigned int nr_pages;
2830 pgoff_t index; 2731 sector_t logical;
2831 pgoff_t end; /* Inclusive */ 2732 pgoff_t index, end;
2832 long nr_to_write = wbc->nr_to_write; 2733 long nr_to_write = wbc->nr_to_write;
2833 int tag; 2734 int i, tag, ret = 0;
2834 2735
2736 memset(mpd, 0, sizeof(struct mpage_da_data));
2737 mpd->wbc = wbc;
2738 mpd->inode = inode;
2835 pagevec_init(&pvec, 0); 2739 pagevec_init(&pvec, 0);
2836 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2740 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2837 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2741 end = wbc->range_end >> PAGE_CACHE_SHIFT;
@@ -2842,13 +2746,11 @@ static int write_cache_pages_da(struct address_space *mapping,
2842 tag = PAGECACHE_TAG_DIRTY; 2746 tag = PAGECACHE_TAG_DIRTY;
2843 2747
2844 *done_index = index; 2748 *done_index = index;
2845 while (!done && (index <= end)) { 2749 while (index <= end) {
2846 int i;
2847
2848 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, 2750 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2849 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 2751 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
2850 if (nr_pages == 0) 2752 if (nr_pages == 0)
2851 break; 2753 return 0;
2852 2754
2853 for (i = 0; i < nr_pages; i++) { 2755 for (i = 0; i < nr_pages; i++) {
2854 struct page *page = pvec.pages[i]; 2756 struct page *page = pvec.pages[i];
@@ -2860,60 +2762,100 @@ static int write_cache_pages_da(struct address_space *mapping,
2860 * mapping. However, page->index will not change 2762 * mapping. However, page->index will not change
2861 * because we have a reference on the page. 2763 * because we have a reference on the page.
2862 */ 2764 */
2863 if (page->index > end) { 2765 if (page->index > end)
2864 done = 1; 2766 goto out;
2865 break;
2866 }
2867 2767
2868 *done_index = page->index + 1; 2768 *done_index = page->index + 1;
2869 2769
2770 /*
2771 * If we can't merge this page, and we have
2772 * accumulated an contiguous region, write it
2773 */
2774 if ((mpd->next_page != page->index) &&
2775 (mpd->next_page != mpd->first_page)) {
2776 mpage_da_map_and_submit(mpd);
2777 goto ret_extent_tail;
2778 }
2779
2870 lock_page(page); 2780 lock_page(page);
2871 2781
2872 /* 2782 /*
2873 * Page truncated or invalidated. We can freely skip it 2783 * If the page is no longer dirty, or its
2874 * then, even for data integrity operations: the page 2784 * mapping no longer corresponds to inode we
2875 * has disappeared concurrently, so there could be no 2785 * are writing (which means it has been
2876 * real expectation of this data interity operation 2786 * truncated or invalidated), or the page is
2877 * even if there is now a new, dirty page at the same 2787 * already under writeback and we are not
2878 * pagecache address. 2788 * doing a data integrity writeback, skip the page
2879 */ 2789 */
2880 if (unlikely(page->mapping != mapping)) { 2790 if (!PageDirty(page) ||
2881continue_unlock: 2791 (PageWriteback(page) &&
2792 (wbc->sync_mode == WB_SYNC_NONE)) ||
2793 unlikely(page->mapping != mapping)) {
2882 unlock_page(page); 2794 unlock_page(page);
2883 continue; 2795 continue;
2884 } 2796 }
2885 2797
2886 if (!PageDirty(page)) { 2798 if (PageWriteback(page))
2887 /* someone wrote it for us */ 2799 wait_on_page_writeback(page);
2888 goto continue_unlock;
2889 }
2890
2891 if (PageWriteback(page)) {
2892 if (wbc->sync_mode != WB_SYNC_NONE)
2893 wait_on_page_writeback(page);
2894 else
2895 goto continue_unlock;
2896 }
2897 2800
2898 BUG_ON(PageWriteback(page)); 2801 BUG_ON(PageWriteback(page));
2899 if (!clear_page_dirty_for_io(page))
2900 goto continue_unlock;
2901 2802
2902 ret = __mpage_da_writepage(page, wbc, mpd); 2803 if (mpd->next_page != page->index)
2903 if (unlikely(ret)) { 2804 mpd->first_page = page->index;
2904 if (ret == AOP_WRITEPAGE_ACTIVATE) { 2805 mpd->next_page = page->index + 1;
2905 unlock_page(page); 2806 logical = (sector_t) page->index <<
2906 ret = 0; 2807 (PAGE_CACHE_SHIFT - inode->i_blkbits);
2907 } else { 2808
2908 done = 1; 2809 if (!page_has_buffers(page)) {
2909 break; 2810 mpage_add_bh_to_extent(mpd, logical,
2910 } 2811 PAGE_CACHE_SIZE,
2812 (1 << BH_Dirty) | (1 << BH_Uptodate));
2813 if (mpd->io_done)
2814 goto ret_extent_tail;
2815 } else {
2816 /*
2817 * Page with regular buffer heads,
2818 * just add all dirty ones
2819 */
2820 head = page_buffers(page);
2821 bh = head;
2822 do {
2823 BUG_ON(buffer_locked(bh));
2824 /*
2825 * We need to try to allocate
2826 * unmapped blocks in the same page.
2827 * Otherwise we won't make progress
2828 * with the page in ext4_writepage
2829 */
2830 if (ext4_bh_delay_or_unwritten(NULL, bh)) {
2831 mpage_add_bh_to_extent(mpd, logical,
2832 bh->b_size,
2833 bh->b_state);
2834 if (mpd->io_done)
2835 goto ret_extent_tail;
2836 } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
2837 /*
2838 * mapped dirty buffer. We need
2839 * to update the b_state
2840 * because we look at b_state
2841 * in mpage_da_map_blocks. We
2842 * don't update b_size because
2843 * if we find an unmapped
2844 * buffer_head later we need to
2845 * use the b_state flag of that
2846 * buffer_head.
2847 */
2848 if (mpd->b_size == 0)
2849 mpd->b_state = bh->b_state & BH_FLAGS;
2850 }
2851 logical++;
2852 } while ((bh = bh->b_this_page) != head);
2911 } 2853 }
2912 2854
2913 if (nr_to_write > 0) { 2855 if (nr_to_write > 0) {
2914 nr_to_write--; 2856 nr_to_write--;
2915 if (nr_to_write == 0 && 2857 if (nr_to_write == 0 &&
2916 wbc->sync_mode == WB_SYNC_NONE) { 2858 wbc->sync_mode == WB_SYNC_NONE)
2917 /* 2859 /*
2918 * We stop writing back only if we are 2860 * We stop writing back only if we are
2919 * not doing integrity sync. In case of 2861 * not doing integrity sync. In case of
@@ -2924,14 +2866,18 @@ continue_unlock:
2924 * pages, but have not synced all of the 2866 * pages, but have not synced all of the
2925 * old dirty pages. 2867 * old dirty pages.
2926 */ 2868 */
2927 done = 1; 2869 goto out;
2928 break;
2929 }
2930 } 2870 }
2931 } 2871 }
2932 pagevec_release(&pvec); 2872 pagevec_release(&pvec);
2933 cond_resched(); 2873 cond_resched();
2934 } 2874 }
2875 return 0;
2876ret_extent_tail:
2877 ret = MPAGE_DA_EXTENT_TAIL;
2878out:
2879 pagevec_release(&pvec);
2880 cond_resched();
2935 return ret; 2881 return ret;
2936} 2882}
2937 2883
@@ -2945,7 +2891,6 @@ static int ext4_da_writepages(struct address_space *mapping,
2945 struct mpage_da_data mpd; 2891 struct mpage_da_data mpd;
2946 struct inode *inode = mapping->host; 2892 struct inode *inode = mapping->host;
2947 int pages_written = 0; 2893 int pages_written = 0;
2948 long pages_skipped;
2949 unsigned int max_pages; 2894 unsigned int max_pages;
2950 int range_cyclic, cycled = 1, io_done = 0; 2895 int range_cyclic, cycled = 1, io_done = 0;
2951 int needed_blocks, ret = 0; 2896 int needed_blocks, ret = 0;
@@ -3028,11 +2973,6 @@ static int ext4_da_writepages(struct address_space *mapping,
3028 wbc->nr_to_write = desired_nr_to_write; 2973 wbc->nr_to_write = desired_nr_to_write;
3029 } 2974 }
3030 2975
3031 mpd.wbc = wbc;
3032 mpd.inode = mapping->host;
3033
3034 pages_skipped = wbc->pages_skipped;
3035
3036retry: 2976retry:
3037 if (wbc->sync_mode == WB_SYNC_ALL) 2977 if (wbc->sync_mode == WB_SYNC_ALL)
3038 tag_pages_for_writeback(mapping, index, end); 2978 tag_pages_for_writeback(mapping, index, end);
@@ -3059,22 +2999,10 @@ retry:
3059 } 2999 }
3060 3000
3061 /* 3001 /*
3062 * Now call __mpage_da_writepage to find the next 3002 * Now call write_cache_pages_da() to find the next
3063 * contiguous region of logical blocks that need 3003 * contiguous region of logical blocks that need
3064 * blocks to be allocated by ext4. We don't actually 3004 * blocks to be allocated by ext4 and submit them.
3065 * submit the blocks for I/O here, even though
3066 * write_cache_pages thinks it will, and will set the
3067 * pages as clean for write before calling
3068 * __mpage_da_writepage().
3069 */ 3005 */
3070 mpd.b_size = 0;
3071 mpd.b_state = 0;
3072 mpd.b_blocknr = 0;
3073 mpd.first_page = 0;
3074 mpd.next_page = 0;
3075 mpd.io_done = 0;
3076 mpd.pages_written = 0;
3077 mpd.retval = 0;
3078 ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); 3006 ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
3079 /* 3007 /*
3080 * If we have a contiguous extent of pages and we 3008 * If we have a contiguous extent of pages and we
@@ -3096,7 +3024,6 @@ retry:
3096 * and try again 3024 * and try again
3097 */ 3025 */
3098 jbd2_journal_force_commit_nested(sbi->s_journal); 3026 jbd2_journal_force_commit_nested(sbi->s_journal);
3099 wbc->pages_skipped = pages_skipped;
3100 ret = 0; 3027 ret = 0;
3101 } else if (ret == MPAGE_DA_EXTENT_TAIL) { 3028 } else if (ret == MPAGE_DA_EXTENT_TAIL) {
3102 /* 3029 /*
@@ -3104,7 +3031,6 @@ retry:
3104 * rest of the pages 3031 * rest of the pages
3105 */ 3032 */
3106 pages_written += mpd.pages_written; 3033 pages_written += mpd.pages_written;
3107 wbc->pages_skipped = pages_skipped;
3108 ret = 0; 3034 ret = 0;
3109 io_done = 1; 3035 io_done = 1;
3110 } else if (wbc->nr_to_write) 3036 } else if (wbc->nr_to_write)
@@ -3122,11 +3048,6 @@ retry:
3122 wbc->range_end = mapping->writeback_index - 1; 3048 wbc->range_end = mapping->writeback_index - 1;
3123 goto retry; 3049 goto retry;
3124 } 3050 }
3125 if (pages_skipped != wbc->pages_skipped)
3126 ext4_msg(inode->i_sb, KERN_CRIT,
3127 "This should not happen leaving %s "
3128 "with nr_to_write = %ld ret = %d",
3129 __func__, wbc->nr_to_write, ret);
3130 3051
3131 /* Update index */ 3052 /* Update index */
3132 wbc->range_cyclic = range_cyclic; 3053 wbc->range_cyclic = range_cyclic;
@@ -3460,6 +3381,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
3460 3381
3461static int ext4_readpage(struct file *file, struct page *page) 3382static int ext4_readpage(struct file *file, struct page *page)
3462{ 3383{
3384 trace_ext4_readpage(page);
3463 return mpage_readpage(page, ext4_get_block); 3385 return mpage_readpage(page, ext4_get_block);
3464} 3386}
3465 3387
@@ -3494,6 +3416,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
3494{ 3416{
3495 journal_t *journal = EXT4_JOURNAL(page->mapping->host); 3417 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
3496 3418
3419 trace_ext4_invalidatepage(page, offset);
3420
3497 /* 3421 /*
3498 * free any io_end structure allocated for buffers to be discarded 3422 * free any io_end structure allocated for buffers to be discarded
3499 */ 3423 */
@@ -3515,6 +3439,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
3515{ 3439{
3516 journal_t *journal = EXT4_JOURNAL(page->mapping->host); 3440 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
3517 3441
3442 trace_ext4_releasepage(page);
3443
3518 WARN_ON(PageChecked(page)); 3444 WARN_ON(PageChecked(page));
3519 if (!page_has_buffers(page)) 3445 if (!page_has_buffers(page))
3520 return 0; 3446 return 0;
@@ -3873,11 +3799,16 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
3873{ 3799{
3874 struct file *file = iocb->ki_filp; 3800 struct file *file = iocb->ki_filp;
3875 struct inode *inode = file->f_mapping->host; 3801 struct inode *inode = file->f_mapping->host;
3802 ssize_t ret;
3876 3803
3804 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
3877 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3805 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3878 return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); 3806 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
3879 3807 else
3880 return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); 3808 ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
3809 trace_ext4_direct_IO_exit(inode, offset,
3810 iov_length(iov, nr_segs), rw, ret);
3811 return ret;
3881} 3812}
3882 3813
3883/* 3814/*
@@ -3903,7 +3834,6 @@ static const struct address_space_operations ext4_ordered_aops = {
3903 .readpage = ext4_readpage, 3834 .readpage = ext4_readpage,
3904 .readpages = ext4_readpages, 3835 .readpages = ext4_readpages,
3905 .writepage = ext4_writepage, 3836 .writepage = ext4_writepage,
3906 .sync_page = block_sync_page,
3907 .write_begin = ext4_write_begin, 3837 .write_begin = ext4_write_begin,
3908 .write_end = ext4_ordered_write_end, 3838 .write_end = ext4_ordered_write_end,
3909 .bmap = ext4_bmap, 3839 .bmap = ext4_bmap,
@@ -3919,7 +3849,6 @@ static const struct address_space_operations ext4_writeback_aops = {
3919 .readpage = ext4_readpage, 3849 .readpage = ext4_readpage,
3920 .readpages = ext4_readpages, 3850 .readpages = ext4_readpages,
3921 .writepage = ext4_writepage, 3851 .writepage = ext4_writepage,
3922 .sync_page = block_sync_page,
3923 .write_begin = ext4_write_begin, 3852 .write_begin = ext4_write_begin,
3924 .write_end = ext4_writeback_write_end, 3853 .write_end = ext4_writeback_write_end,
3925 .bmap = ext4_bmap, 3854 .bmap = ext4_bmap,
@@ -3935,7 +3864,6 @@ static const struct address_space_operations ext4_journalled_aops = {
3935 .readpage = ext4_readpage, 3864 .readpage = ext4_readpage,
3936 .readpages = ext4_readpages, 3865 .readpages = ext4_readpages,
3937 .writepage = ext4_writepage, 3866 .writepage = ext4_writepage,
3938 .sync_page = block_sync_page,
3939 .write_begin = ext4_write_begin, 3867 .write_begin = ext4_write_begin,
3940 .write_end = ext4_journalled_write_end, 3868 .write_end = ext4_journalled_write_end,
3941 .set_page_dirty = ext4_journalled_set_page_dirty, 3869 .set_page_dirty = ext4_journalled_set_page_dirty,
@@ -3951,7 +3879,6 @@ static const struct address_space_operations ext4_da_aops = {
3951 .readpages = ext4_readpages, 3879 .readpages = ext4_readpages,
3952 .writepage = ext4_writepage, 3880 .writepage = ext4_writepage,
3953 .writepages = ext4_da_writepages, 3881 .writepages = ext4_da_writepages,
3954 .sync_page = block_sync_page,
3955 .write_begin = ext4_da_write_begin, 3882 .write_begin = ext4_da_write_begin,
3956 .write_end = ext4_da_write_end, 3883 .write_end = ext4_da_write_end,
3957 .bmap = ext4_bmap, 3884 .bmap = ext4_bmap,
@@ -4177,6 +4104,9 @@ no_top:
4177 * 4104 *
4178 * We release `count' blocks on disk, but (last - first) may be greater 4105 * We release `count' blocks on disk, but (last - first) may be greater
4179 * than `count' because there can be holes in there. 4106 * than `count' because there can be holes in there.
4107 *
4108 * Return 0 on success, 1 on invalid block range
4109 * and < 0 on fatal error.
4180 */ 4110 */
4181static int ext4_clear_blocks(handle_t *handle, struct inode *inode, 4111static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4182 struct buffer_head *bh, 4112 struct buffer_head *bh,
@@ -4203,33 +4133,32 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4203 if (bh) { 4133 if (bh) {
4204 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4134 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
4205 err = ext4_handle_dirty_metadata(handle, inode, bh); 4135 err = ext4_handle_dirty_metadata(handle, inode, bh);
4206 if (unlikely(err)) { 4136 if (unlikely(err))
4207 ext4_std_error(inode->i_sb, err); 4137 goto out_err;
4208 return 1;
4209 }
4210 } 4138 }
4211 err = ext4_mark_inode_dirty(handle, inode); 4139 err = ext4_mark_inode_dirty(handle, inode);
4212 if (unlikely(err)) { 4140 if (unlikely(err))
4213 ext4_std_error(inode->i_sb, err); 4141 goto out_err;
4214 return 1;
4215 }
4216 err = ext4_truncate_restart_trans(handle, inode, 4142 err = ext4_truncate_restart_trans(handle, inode,
4217 blocks_for_truncate(inode)); 4143 blocks_for_truncate(inode));
4218 if (unlikely(err)) { 4144 if (unlikely(err))
4219 ext4_std_error(inode->i_sb, err); 4145 goto out_err;
4220 return 1;
4221 }
4222 if (bh) { 4146 if (bh) {
4223 BUFFER_TRACE(bh, "retaking write access"); 4147 BUFFER_TRACE(bh, "retaking write access");
4224 ext4_journal_get_write_access(handle, bh); 4148 err = ext4_journal_get_write_access(handle, bh);
4149 if (unlikely(err))
4150 goto out_err;
4225 } 4151 }
4226 } 4152 }
4227 4153
4228 for (p = first; p < last; p++) 4154 for (p = first; p < last; p++)
4229 *p = 0; 4155 *p = 0;
4230 4156
4231 ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); 4157 ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags);
4232 return 0; 4158 return 0;
4159out_err:
4160 ext4_std_error(inode->i_sb, err);
4161 return err;
4233} 4162}
4234 4163
4235/** 4164/**
@@ -4263,7 +4192,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4263 ext4_fsblk_t nr; /* Current block # */ 4192 ext4_fsblk_t nr; /* Current block # */
4264 __le32 *p; /* Pointer into inode/ind 4193 __le32 *p; /* Pointer into inode/ind
4265 for current block */ 4194 for current block */
4266 int err; 4195 int err = 0;
4267 4196
4268 if (this_bh) { /* For indirect block */ 4197 if (this_bh) { /* For indirect block */
4269 BUFFER_TRACE(this_bh, "get_write_access"); 4198 BUFFER_TRACE(this_bh, "get_write_access");
@@ -4285,9 +4214,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4285 } else if (nr == block_to_free + count) { 4214 } else if (nr == block_to_free + count) {
4286 count++; 4215 count++;
4287 } else { 4216 } else {
4288 if (ext4_clear_blocks(handle, inode, this_bh, 4217 err = ext4_clear_blocks(handle, inode, this_bh,
4289 block_to_free, count, 4218 block_to_free, count,
4290 block_to_free_p, p)) 4219 block_to_free_p, p);
4220 if (err)
4291 break; 4221 break;
4292 block_to_free = nr; 4222 block_to_free = nr;
4293 block_to_free_p = p; 4223 block_to_free_p = p;
@@ -4296,9 +4226,12 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4296 } 4226 }
4297 } 4227 }
4298 4228
4299 if (count > 0) 4229 if (!err && count > 0)
4300 ext4_clear_blocks(handle, inode, this_bh, block_to_free, 4230 err = ext4_clear_blocks(handle, inode, this_bh, block_to_free,
4301 count, block_to_free_p, p); 4231 count, block_to_free_p, p);
4232 if (err < 0)
4233 /* fatal error */
4234 return;
4302 4235
4303 if (this_bh) { 4236 if (this_bh) {
4304 BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); 4237 BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
@@ -4416,7 +4349,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4416 * transaction where the data blocks are 4349 * transaction where the data blocks are
4417 * actually freed. 4350 * actually freed.
4418 */ 4351 */
4419 ext4_free_blocks(handle, inode, 0, nr, 1, 4352 ext4_free_blocks(handle, inode, NULL, nr, 1,
4420 EXT4_FREE_BLOCKS_METADATA| 4353 EXT4_FREE_BLOCKS_METADATA|
4421 EXT4_FREE_BLOCKS_FORGET); 4354 EXT4_FREE_BLOCKS_FORGET);
4422 4355
@@ -4500,6 +4433,8 @@ void ext4_truncate(struct inode *inode)
4500 ext4_lblk_t last_block; 4433 ext4_lblk_t last_block;
4501 unsigned blocksize = inode->i_sb->s_blocksize; 4434 unsigned blocksize = inode->i_sb->s_blocksize;
4502 4435
4436 trace_ext4_truncate_enter(inode);
4437
4503 if (!ext4_can_truncate(inode)) 4438 if (!ext4_can_truncate(inode))
4504 return; 4439 return;
4505 4440
@@ -4510,6 +4445,7 @@ void ext4_truncate(struct inode *inode)
4510 4445
4511 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 4446 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
4512 ext4_ext_truncate(inode); 4447 ext4_ext_truncate(inode);
4448 trace_ext4_truncate_exit(inode);
4513 return; 4449 return;
4514 } 4450 }
4515 4451
@@ -4639,6 +4575,7 @@ out_stop:
4639 ext4_orphan_del(handle, inode); 4575 ext4_orphan_del(handle, inode);
4640 4576
4641 ext4_journal_stop(handle); 4577 ext4_journal_stop(handle);
4578 trace_ext4_truncate_exit(inode);
4642} 4579}
4643 4580
4644/* 4581/*
@@ -4770,6 +4707,7 @@ make_io:
4770 * has in-inode xattrs, or we don't have this inode in memory. 4707 * has in-inode xattrs, or we don't have this inode in memory.
4771 * Read the block from disk. 4708 * Read the block from disk.
4772 */ 4709 */
4710 trace_ext4_load_inode(inode);
4773 get_bh(bh); 4711 get_bh(bh);
4774 bh->b_end_io = end_buffer_read_sync; 4712 bh->b_end_io = end_buffer_read_sync;
4775 submit_bh(READ_META, bh); 4713 submit_bh(READ_META, bh);
@@ -4875,7 +4813,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4875 return inode; 4813 return inode;
4876 4814
4877 ei = EXT4_I(inode); 4815 ei = EXT4_I(inode);
4878 iloc.bh = 0; 4816 iloc.bh = NULL;
4879 4817
4880 ret = __ext4_get_inode_loc(inode, &iloc, 0); 4818 ret = __ext4_get_inode_loc(inode, &iloc, 0);
4881 if (ret < 0) 4819 if (ret < 0)
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index eb3bc2fe647e..808c554e773f 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -38,7 +38,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
38 unsigned int oldflags; 38 unsigned int oldflags;
39 unsigned int jflag; 39 unsigned int jflag;
40 40
41 if (!is_owner_or_cap(inode)) 41 if (!inode_owner_or_capable(inode))
42 return -EACCES; 42 return -EACCES;
43 43
44 if (get_user(flags, (int __user *) arg)) 44 if (get_user(flags, (int __user *) arg))
@@ -146,7 +146,7 @@ flags_out:
146 __u32 generation; 146 __u32 generation;
147 int err; 147 int err;
148 148
149 if (!is_owner_or_cap(inode)) 149 if (!inode_owner_or_capable(inode))
150 return -EPERM; 150 return -EPERM;
151 151
152 err = mnt_want_write(filp->f_path.mnt); 152 err = mnt_want_write(filp->f_path.mnt);
@@ -298,7 +298,7 @@ mext_out:
298 case EXT4_IOC_MIGRATE: 298 case EXT4_IOC_MIGRATE:
299 { 299 {
300 int err; 300 int err;
301 if (!is_owner_or_cap(inode)) 301 if (!inode_owner_or_capable(inode))
302 return -EACCES; 302 return -EACCES;
303 303
304 err = mnt_want_write(filp->f_path.mnt); 304 err = mnt_want_write(filp->f_path.mnt);
@@ -320,7 +320,7 @@ mext_out:
320 case EXT4_IOC_ALLOC_DA_BLKS: 320 case EXT4_IOC_ALLOC_DA_BLKS:
321 { 321 {
322 int err; 322 int err;
323 if (!is_owner_or_cap(inode)) 323 if (!inode_owner_or_capable(inode))
324 return -EACCES; 324 return -EACCES;
325 325
326 err = mnt_want_write(filp->f_path.mnt); 326 err = mnt_want_write(filp->f_path.mnt);
@@ -334,16 +334,22 @@ mext_out:
334 case FITRIM: 334 case FITRIM:
335 { 335 {
336 struct super_block *sb = inode->i_sb; 336 struct super_block *sb = inode->i_sb;
337 struct request_queue *q = bdev_get_queue(sb->s_bdev);
337 struct fstrim_range range; 338 struct fstrim_range range;
338 int ret = 0; 339 int ret = 0;
339 340
340 if (!capable(CAP_SYS_ADMIN)) 341 if (!capable(CAP_SYS_ADMIN))
341 return -EPERM; 342 return -EPERM;
342 343
344 if (!blk_queue_discard(q))
345 return -EOPNOTSUPP;
346
343 if (copy_from_user(&range, (struct fstrim_range *)arg, 347 if (copy_from_user(&range, (struct fstrim_range *)arg,
344 sizeof(range))) 348 sizeof(range)))
345 return -EFAULT; 349 return -EFAULT;
346 350
351 range.minlen = max((unsigned int)range.minlen,
352 q->limits.discard_granularity);
347 ret = ext4_trim_fs(sb, &range); 353 ret = ext4_trim_fs(sb, &range);
348 if (ret < 0) 354 if (ret < 0)
349 return ret; 355 return ret;
@@ -421,6 +427,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
421 return err; 427 return err;
422 } 428 }
423 case EXT4_IOC_MOVE_EXT: 429 case EXT4_IOC_MOVE_EXT:
430 case FITRIM:
424 break; 431 break;
425 default: 432 default:
426 return -ENOIOCTLCMD; 433 return -ENOIOCTLCMD;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index d1fe09aea73d..a5837a837a8b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -432,9 +432,10 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
432 } 432 }
433 433
434 /* at order 0 we see each particular block */ 434 /* at order 0 we see each particular block */
435 *max = 1 << (e4b->bd_blkbits + 3); 435 if (order == 0) {
436 if (order == 0) 436 *max = 1 << (e4b->bd_blkbits + 3);
437 return EXT4_MB_BITMAP(e4b); 437 return EXT4_MB_BITMAP(e4b);
438 }
438 439
439 bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; 440 bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
440 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; 441 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
@@ -616,7 +617,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
616 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); 617 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
617 618
618 grp = ext4_get_group_info(sb, e4b->bd_group); 619 grp = ext4_get_group_info(sb, e4b->bd_group);
619 buddy = mb_find_buddy(e4b, 0, &max);
620 list_for_each(cur, &grp->bb_prealloc_list) { 620 list_for_each(cur, &grp->bb_prealloc_list) {
621 ext4_group_t groupnr; 621 ext4_group_t groupnr;
622 struct ext4_prealloc_space *pa; 622 struct ext4_prealloc_space *pa;
@@ -635,7 +635,12 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
635#define mb_check_buddy(e4b) 635#define mb_check_buddy(e4b)
636#endif 636#endif
637 637
638/* FIXME!! need more doc */ 638/*
639 * Divide blocks started from @first with length @len into
640 * smaller chunks with power of 2 blocks.
641 * Clear the bits in bitmap which the blocks of the chunk(s) covered,
642 * then increase bb_counters[] for corresponded chunk size.
643 */
639static void ext4_mb_mark_free_simple(struct super_block *sb, 644static void ext4_mb_mark_free_simple(struct super_block *sb,
640 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, 645 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
641 struct ext4_group_info *grp) 646 struct ext4_group_info *grp)
@@ -2381,7 +2386,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2381 /* An 8TB filesystem with 64-bit pointers requires a 4096 byte 2386 /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
2382 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. 2387 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
2383 * So a two level scheme suffices for now. */ 2388 * So a two level scheme suffices for now. */
2384 sbi->s_group_info = kmalloc(array_size, GFP_KERNEL); 2389 sbi->s_group_info = kzalloc(array_size, GFP_KERNEL);
2385 if (sbi->s_group_info == NULL) { 2390 if (sbi->s_group_info == NULL) {
2386 printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); 2391 printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
2387 return -ENOMEM; 2392 return -ENOMEM;
@@ -3208,7 +3213,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3208 cur_distance = abs(goal_block - cpa->pa_pstart); 3213 cur_distance = abs(goal_block - cpa->pa_pstart);
3209 new_distance = abs(goal_block - pa->pa_pstart); 3214 new_distance = abs(goal_block - pa->pa_pstart);
3210 3215
3211 if (cur_distance < new_distance) 3216 if (cur_distance <= new_distance)
3212 return cpa; 3217 return cpa;
3213 3218
3214 /* drop the previous reference */ 3219 /* drop the previous reference */
@@ -3907,7 +3912,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3907 struct super_block *sb = ac->ac_sb; 3912 struct super_block *sb = ac->ac_sb;
3908 ext4_group_t ngroups, i; 3913 ext4_group_t ngroups, i;
3909 3914
3910 if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) 3915 if (!mb_enable_debug ||
3916 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
3911 return; 3917 return;
3912 3918
3913 printk(KERN_ERR "EXT4-fs: Can't allocate:" 3919 printk(KERN_ERR "EXT4-fs: Can't allocate:"
@@ -4753,7 +4759,8 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
4753 * bitmap. Then issue a TRIM command on this extent and free the extent in 4759 * bitmap. Then issue a TRIM command on this extent and free the extent in
4754 * the group buddy bitmap. This is done until whole group is scanned. 4760 * the group buddy bitmap. This is done until whole group is scanned.
4755 */ 4761 */
4756ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, 4762static ext4_grpblk_t
4763ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
4757 ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) 4764 ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
4758{ 4765{
4759 void *bitmap; 4766 void *bitmap;
@@ -4863,10 +4870,15 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4863 break; 4870 break;
4864 } 4871 }
4865 4872
4866 if (len >= EXT4_BLOCKS_PER_GROUP(sb)) 4873 /*
4867 len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block); 4874 * For all the groups except the last one, last block will
4868 else 4875 * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to
4876 * change it for the last group in which case start +
4877 * len < EXT4_BLOCKS_PER_GROUP(sb).
4878 */
4879 if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb))
4869 last_block = first_block + len; 4880 last_block = first_block + len;
4881 len -= last_block - first_block;
4870 4882
4871 if (e4b.bd_info->bb_free >= minlen) { 4883 if (e4b.bd_info->bb_free >= minlen) {
4872 cnt = ext4_trim_all_free(sb, &e4b, first_block, 4884 cnt = ext4_trim_all_free(sb, &e4b, first_block,
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index b619322c76f0..22bd4d7f289b 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -169,7 +169,7 @@ struct ext4_allocation_context {
169 /* original request */ 169 /* original request */
170 struct ext4_free_extent ac_o_ex; 170 struct ext4_free_extent ac_o_ex;
171 171
172 /* goal request (after normalization) */ 172 /* goal request (normalized ac_o_ex) */
173 struct ext4_free_extent ac_g_ex; 173 struct ext4_free_extent ac_g_ex;
174 174
175 /* the best found extent */ 175 /* the best found extent */
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b0a126f23c20..d1bafa57f483 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -263,7 +263,7 @@ static int free_dind_blocks(handle_t *handle,
263 for (i = 0; i < max_entries; i++) { 263 for (i = 0; i < max_entries; i++) {
264 if (tmp_idata[i]) { 264 if (tmp_idata[i]) {
265 extend_credit_for_blkdel(handle, inode); 265 extend_credit_for_blkdel(handle, inode);
266 ext4_free_blocks(handle, inode, 0, 266 ext4_free_blocks(handle, inode, NULL,
267 le32_to_cpu(tmp_idata[i]), 1, 267 le32_to_cpu(tmp_idata[i]), 1,
268 EXT4_FREE_BLOCKS_METADATA | 268 EXT4_FREE_BLOCKS_METADATA |
269 EXT4_FREE_BLOCKS_FORGET); 269 EXT4_FREE_BLOCKS_FORGET);
@@ -271,7 +271,7 @@ static int free_dind_blocks(handle_t *handle,
271 } 271 }
272 put_bh(bh); 272 put_bh(bh);
273 extend_credit_for_blkdel(handle, inode); 273 extend_credit_for_blkdel(handle, inode);
274 ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, 274 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
275 EXT4_FREE_BLOCKS_METADATA | 275 EXT4_FREE_BLOCKS_METADATA |
276 EXT4_FREE_BLOCKS_FORGET); 276 EXT4_FREE_BLOCKS_FORGET);
277 return 0; 277 return 0;
@@ -302,7 +302,7 @@ static int free_tind_blocks(handle_t *handle,
302 } 302 }
303 put_bh(bh); 303 put_bh(bh);
304 extend_credit_for_blkdel(handle, inode); 304 extend_credit_for_blkdel(handle, inode);
305 ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, 305 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
306 EXT4_FREE_BLOCKS_METADATA | 306 EXT4_FREE_BLOCKS_METADATA |
307 EXT4_FREE_BLOCKS_FORGET); 307 EXT4_FREE_BLOCKS_FORGET);
308 return 0; 308 return 0;
@@ -315,7 +315,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
315 /* ei->i_data[EXT4_IND_BLOCK] */ 315 /* ei->i_data[EXT4_IND_BLOCK] */
316 if (i_data[0]) { 316 if (i_data[0]) {
317 extend_credit_for_blkdel(handle, inode); 317 extend_credit_for_blkdel(handle, inode);
318 ext4_free_blocks(handle, inode, 0, 318 ext4_free_blocks(handle, inode, NULL,
319 le32_to_cpu(i_data[0]), 1, 319 le32_to_cpu(i_data[0]), 1,
320 EXT4_FREE_BLOCKS_METADATA | 320 EXT4_FREE_BLOCKS_METADATA |
321 EXT4_FREE_BLOCKS_FORGET); 321 EXT4_FREE_BLOCKS_FORGET);
@@ -428,7 +428,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
428 } 428 }
429 put_bh(bh); 429 put_bh(bh);
430 extend_credit_for_blkdel(handle, inode); 430 extend_credit_for_blkdel(handle, inode);
431 ext4_free_blocks(handle, inode, 0, block, 1, 431 ext4_free_blocks(handle, inode, NULL, block, 1,
432 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 432 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
433 return retval; 433 return retval;
434} 434}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index e781b7ea5630..67fd0b025858 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -40,6 +40,7 @@
40#include "xattr.h" 40#include "xattr.h"
41#include "acl.h" 41#include "acl.h"
42 42
43#include <trace/events/ext4.h>
43/* 44/*
44 * define how far ahead to read directories while searching them. 45 * define how far ahead to read directories while searching them.
45 */ 46 */
@@ -2183,6 +2184,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2183 struct ext4_dir_entry_2 *de; 2184 struct ext4_dir_entry_2 *de;
2184 handle_t *handle; 2185 handle_t *handle;
2185 2186
2187 trace_ext4_unlink_enter(dir, dentry);
2186 /* Initialize quotas before so that eventual writes go 2188 /* Initialize quotas before so that eventual writes go
2187 * in separate transaction */ 2189 * in separate transaction */
2188 dquot_initialize(dir); 2190 dquot_initialize(dir);
@@ -2228,6 +2230,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2228end_unlink: 2230end_unlink:
2229 ext4_journal_stop(handle); 2231 ext4_journal_stop(handle);
2230 brelse(bh); 2232 brelse(bh);
2233 trace_ext4_unlink_exit(dentry, retval);
2231 return retval; 2234 return retval;
2232} 2235}
2233 2236
@@ -2402,6 +2405,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2402 if (!new_inode && new_dir != old_dir && 2405 if (!new_inode && new_dir != old_dir &&
2403 EXT4_DIR_LINK_MAX(new_dir)) 2406 EXT4_DIR_LINK_MAX(new_dir))
2404 goto end_rename; 2407 goto end_rename;
2408 BUFFER_TRACE(dir_bh, "get_write_access");
2409 retval = ext4_journal_get_write_access(handle, dir_bh);
2410 if (retval)
2411 goto end_rename;
2405 } 2412 }
2406 if (!new_bh) { 2413 if (!new_bh) {
2407 retval = ext4_add_entry(handle, new_dentry, old_inode); 2414 retval = ext4_add_entry(handle, new_dentry, old_inode);
@@ -2409,7 +2416,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2409 goto end_rename; 2416 goto end_rename;
2410 } else { 2417 } else {
2411 BUFFER_TRACE(new_bh, "get write access"); 2418 BUFFER_TRACE(new_bh, "get write access");
2412 ext4_journal_get_write_access(handle, new_bh); 2419 retval = ext4_journal_get_write_access(handle, new_bh);
2420 if (retval)
2421 goto end_rename;
2413 new_de->inode = cpu_to_le32(old_inode->i_ino); 2422 new_de->inode = cpu_to_le32(old_inode->i_ino);
2414 if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb, 2423 if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
2415 EXT4_FEATURE_INCOMPAT_FILETYPE)) 2424 EXT4_FEATURE_INCOMPAT_FILETYPE))
@@ -2470,8 +2479,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2470 old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); 2479 old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
2471 ext4_update_dx_flag(old_dir); 2480 ext4_update_dx_flag(old_dir);
2472 if (dir_bh) { 2481 if (dir_bh) {
2473 BUFFER_TRACE(dir_bh, "get_write_access");
2474 ext4_journal_get_write_access(handle, dir_bh);
2475 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = 2482 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
2476 cpu_to_le32(new_dir->i_ino); 2483 cpu_to_le32(new_dir->i_ino);
2477 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); 2484 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 955cc309142f..b6dbd056fcb1 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -259,6 +259,11 @@ static void ext4_end_bio(struct bio *bio, int error)
259 bi_sector >> (inode->i_blkbits - 9)); 259 bi_sector >> (inode->i_blkbits - 9));
260 } 260 }
261 261
262 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
263 ext4_free_io_end(io_end);
264 return;
265 }
266
262 /* Add the io_end to per-inode completed io list*/ 267 /* Add the io_end to per-inode completed io list*/
263 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); 268 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
264 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); 269 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
@@ -279,9 +284,9 @@ void ext4_io_submit(struct ext4_io_submit *io)
279 BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); 284 BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
280 bio_put(io->io_bio); 285 bio_put(io->io_bio);
281 } 286 }
282 io->io_bio = 0; 287 io->io_bio = NULL;
283 io->io_op = 0; 288 io->io_op = 0;
284 io->io_end = 0; 289 io->io_end = NULL;
285} 290}
286 291
287static int io_submit_init(struct ext4_io_submit *io, 292static int io_submit_init(struct ext4_io_submit *io,
@@ -310,8 +315,7 @@ static int io_submit_init(struct ext4_io_submit *io,
310 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); 315 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
311 316
312 io->io_bio = bio; 317 io->io_bio = bio;
313 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? 318 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
314 WRITE_SYNC_PLUG : WRITE);
315 io->io_next_block = bh->b_blocknr; 319 io->io_next_block = bh->b_blocknr;
316 return 0; 320 return 0;
317} 321}
@@ -381,8 +385,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
381 385
382 BUG_ON(!PageLocked(page)); 386 BUG_ON(!PageLocked(page));
383 BUG_ON(PageWriteback(page)); 387 BUG_ON(PageWriteback(page));
384 set_page_writeback(page);
385 ClearPageError(page);
386 388
387 io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); 389 io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
388 if (!io_page) { 390 if (!io_page) {
@@ -393,6 +395,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
393 io_page->p_page = page; 395 io_page->p_page = page;
394 atomic_set(&io_page->p_count, 1); 396 atomic_set(&io_page->p_count, 1);
395 get_page(page); 397 get_page(page);
398 set_page_writeback(page);
399 ClearPageError(page);
396 400
397 for (bh = head = page_buffers(page), block_start = 0; 401 for (bh = head = page_buffers(page), block_start = 0;
398 bh != head || !block_start; 402 bh != head || !block_start;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3ecc6e45d2f9..80bbc9c60c24 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -230,7 +230,7 @@ static int setup_new_group_blocks(struct super_block *sb,
230 } 230 }
231 231
232 /* Zero out all of the reserved backup group descriptor table blocks */ 232 /* Zero out all of the reserved backup group descriptor table blocks */
233 ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", 233 ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
234 block, sbi->s_itb_per_group); 234 block, sbi->s_itb_per_group);
235 err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, 235 err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
236 GFP_NOFS); 236 GFP_NOFS);
@@ -248,7 +248,7 @@ static int setup_new_group_blocks(struct super_block *sb,
248 248
249 /* Zero out all of the inode table blocks */ 249 /* Zero out all of the inode table blocks */
250 block = input->inode_table; 250 block = input->inode_table;
251 ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", 251 ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
252 block, sbi->s_itb_per_group); 252 block, sbi->s_itb_per_group);
253 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); 253 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
254 if (err) 254 if (err)
@@ -499,12 +499,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
499 return err; 499 return err;
500 500
501exit_inode: 501exit_inode:
502 /* ext4_journal_release_buffer(handle, iloc.bh); */ 502 /* ext4_handle_release_buffer(handle, iloc.bh); */
503 brelse(iloc.bh); 503 brelse(iloc.bh);
504exit_dindj: 504exit_dindj:
505 /* ext4_journal_release_buffer(handle, dind); */ 505 /* ext4_handle_release_buffer(handle, dind); */
506exit_sbh: 506exit_sbh:
507 /* ext4_journal_release_buffer(handle, EXT4_SB(sb)->s_sbh); */ 507 /* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
508exit_dind: 508exit_dind:
509 brelse(dind); 509 brelse(dind);
510exit_bh: 510exit_bh:
@@ -586,7 +586,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
586 /* 586 /*
587 int j; 587 int j;
588 for (j = 0; j < i; j++) 588 for (j = 0; j < i; j++)
589 ext4_journal_release_buffer(handle, primary[j]); 589 ext4_handle_release_buffer(handle, primary[j]);
590 */ 590 */
591 goto exit_bh; 591 goto exit_bh;
592 } 592 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 203f9e4a70be..22546ad7f0ae 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -54,9 +54,9 @@
54 54
55static struct proc_dir_entry *ext4_proc_root; 55static struct proc_dir_entry *ext4_proc_root;
56static struct kset *ext4_kset; 56static struct kset *ext4_kset;
57struct ext4_lazy_init *ext4_li_info; 57static struct ext4_lazy_init *ext4_li_info;
58struct mutex ext4_li_mtx; 58static struct mutex ext4_li_mtx;
59struct ext4_features *ext4_feat; 59static struct ext4_features *ext4_feat;
60 60
61static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 61static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
62 unsigned long journal_devnum); 62 unsigned long journal_devnum);
@@ -75,6 +75,7 @@ static void ext4_write_super(struct super_block *sb);
75static int ext4_freeze(struct super_block *sb); 75static int ext4_freeze(struct super_block *sb);
76static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, 76static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
77 const char *dev_name, void *data); 77 const char *dev_name, void *data);
78static int ext4_feature_set_ok(struct super_block *sb, int readonly);
78static void ext4_destroy_lazyinit_thread(void); 79static void ext4_destroy_lazyinit_thread(void);
79static void ext4_unregister_li_request(struct super_block *sb); 80static void ext4_unregister_li_request(struct super_block *sb);
80static void ext4_clear_request_list(void); 81static void ext4_clear_request_list(void);
@@ -594,7 +595,7 @@ __acquires(bitlock)
594 595
595 vaf.fmt = fmt; 596 vaf.fmt = fmt;
596 vaf.va = &args; 597 vaf.va = &args;
597 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u", 598 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
598 sb->s_id, function, line, grp); 599 sb->s_id, function, line, grp);
599 if (ino) 600 if (ino)
600 printk(KERN_CONT "inode %lu: ", ino); 601 printk(KERN_CONT "inode %lu: ", ino);
@@ -997,13 +998,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
997 if (test_opt(sb, OLDALLOC)) 998 if (test_opt(sb, OLDALLOC))
998 seq_puts(seq, ",oldalloc"); 999 seq_puts(seq, ",oldalloc");
999#ifdef CONFIG_EXT4_FS_XATTR 1000#ifdef CONFIG_EXT4_FS_XATTR
1000 if (test_opt(sb, XATTR_USER) && 1001 if (test_opt(sb, XATTR_USER))
1001 !(def_mount_opts & EXT4_DEFM_XATTR_USER))
1002 seq_puts(seq, ",user_xattr"); 1002 seq_puts(seq, ",user_xattr");
1003 if (!test_opt(sb, XATTR_USER) && 1003 if (!test_opt(sb, XATTR_USER))
1004 (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
1005 seq_puts(seq, ",nouser_xattr"); 1004 seq_puts(seq, ",nouser_xattr");
1006 }
1007#endif 1005#endif
1008#ifdef CONFIG_EXT4_FS_POSIX_ACL 1006#ifdef CONFIG_EXT4_FS_POSIX_ACL
1009 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 1007 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
@@ -1041,8 +1039,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1041 !(def_mount_opts & EXT4_DEFM_NODELALLOC)) 1039 !(def_mount_opts & EXT4_DEFM_NODELALLOC))
1042 seq_puts(seq, ",nodelalloc"); 1040 seq_puts(seq, ",nodelalloc");
1043 1041
1044 if (test_opt(sb, MBLK_IO_SUBMIT)) 1042 if (!test_opt(sb, MBLK_IO_SUBMIT))
1045 seq_puts(seq, ",mblk_io_submit"); 1043 seq_puts(seq, ",nomblk_io_submit");
1046 if (sbi->s_stripe) 1044 if (sbi->s_stripe)
1047 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 1045 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
1048 /* 1046 /*
@@ -1451,7 +1449,7 @@ static int parse_options(char *options, struct super_block *sb,
1451 * Initialize args struct so we know whether arg was 1449 * Initialize args struct so we know whether arg was
1452 * found; some options take optional arguments. 1450 * found; some options take optional arguments.
1453 */ 1451 */
1454 args[0].to = args[0].from = 0; 1452 args[0].to = args[0].from = NULL;
1455 token = match_token(p, tokens, args); 1453 token = match_token(p, tokens, args);
1456 switch (token) { 1454 switch (token) {
1457 case Opt_bsd_df: 1455 case Opt_bsd_df:
@@ -1771,7 +1769,7 @@ set_qf_format:
1771 return 0; 1769 return 0;
1772 if (option < 0 || option > (1 << 30)) 1770 if (option < 0 || option > (1 << 30))
1773 return 0; 1771 return 0;
1774 if (!is_power_of_2(option)) { 1772 if (option && !is_power_of_2(option)) {
1775 ext4_msg(sb, KERN_ERR, 1773 ext4_msg(sb, KERN_ERR,
1776 "EXT4-fs: inode_readahead_blks" 1774 "EXT4-fs: inode_readahead_blks"
1777 " must be a power of 2"); 1775 " must be a power of 2");
@@ -2120,6 +2118,13 @@ static void ext4_orphan_cleanup(struct super_block *sb,
2120 return; 2118 return;
2121 } 2119 }
2122 2120
2121 /* Check if feature set would not allow a r/w mount */
2122 if (!ext4_feature_set_ok(sb, 0)) {
2123 ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
2124 "unknown ROCOMPAT features");
2125 return;
2126 }
2127
2123 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 2128 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2124 if (es->s_last_orphan) 2129 if (es->s_last_orphan)
2125 jbd_debug(1, "Errors on filesystem, " 2130 jbd_debug(1, "Errors on filesystem, "
@@ -2412,7 +2417,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2412 if (parse_strtoul(buf, 0x40000000, &t)) 2417 if (parse_strtoul(buf, 0x40000000, &t))
2413 return -EINVAL; 2418 return -EINVAL;
2414 2419
2415 if (!is_power_of_2(t)) 2420 if (t && !is_power_of_2(t))
2416 return -EINVAL; 2421 return -EINVAL;
2417 2422
2418 sbi->s_inode_readahead_blks = t; 2423 sbi->s_inode_readahead_blks = t;
@@ -3095,14 +3100,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3095 } 3100 }
3096 if (def_mount_opts & EXT4_DEFM_UID16) 3101 if (def_mount_opts & EXT4_DEFM_UID16)
3097 set_opt(sb, NO_UID32); 3102 set_opt(sb, NO_UID32);
3103 /* xattr user namespace & acls are now defaulted on */
3098#ifdef CONFIG_EXT4_FS_XATTR 3104#ifdef CONFIG_EXT4_FS_XATTR
3099 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 3105 set_opt(sb, XATTR_USER);
3100 set_opt(sb, XATTR_USER);
3101#endif 3106#endif
3102#ifdef CONFIG_EXT4_FS_POSIX_ACL 3107#ifdef CONFIG_EXT4_FS_POSIX_ACL
3103 if (def_mount_opts & EXT4_DEFM_ACL) 3108 set_opt(sb, POSIX_ACL);
3104 set_opt(sb, POSIX_ACL);
3105#endif 3109#endif
3110 set_opt(sb, MBLK_IO_SUBMIT);
3106 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 3111 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3107 set_opt(sb, JOURNAL_DATA); 3112 set_opt(sb, JOURNAL_DATA);
3108 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 3113 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
@@ -3516,7 +3521,7 @@ no_journal:
3516 * concurrency isn't really necessary. Limit it to 1. 3521 * concurrency isn't really necessary. Limit it to 1.
3517 */ 3522 */
3518 EXT4_SB(sb)->dio_unwritten_wq = 3523 EXT4_SB(sb)->dio_unwritten_wq =
3519 alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1); 3524 alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
3520 if (!EXT4_SB(sb)->dio_unwritten_wq) { 3525 if (!EXT4_SB(sb)->dio_unwritten_wq) {
3521 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 3526 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
3522 goto failed_mount_wq; 3527 goto failed_mount_wq;
@@ -3531,17 +3536,16 @@ no_journal:
3531 if (IS_ERR(root)) { 3536 if (IS_ERR(root)) {
3532 ext4_msg(sb, KERN_ERR, "get root inode failed"); 3537 ext4_msg(sb, KERN_ERR, "get root inode failed");
3533 ret = PTR_ERR(root); 3538 ret = PTR_ERR(root);
3539 root = NULL;
3534 goto failed_mount4; 3540 goto failed_mount4;
3535 } 3541 }
3536 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 3542 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
3537 iput(root);
3538 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); 3543 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
3539 goto failed_mount4; 3544 goto failed_mount4;
3540 } 3545 }
3541 sb->s_root = d_alloc_root(root); 3546 sb->s_root = d_alloc_root(root);
3542 if (!sb->s_root) { 3547 if (!sb->s_root) {
3543 ext4_msg(sb, KERN_ERR, "get root dentry failed"); 3548 ext4_msg(sb, KERN_ERR, "get root dentry failed");
3544 iput(root);
3545 ret = -ENOMEM; 3549 ret = -ENOMEM;
3546 goto failed_mount4; 3550 goto failed_mount4;
3547 } 3551 }
@@ -3657,6 +3661,8 @@ cantfind_ext4:
3657 goto failed_mount; 3661 goto failed_mount;
3658 3662
3659failed_mount4: 3663failed_mount4:
3664 iput(root);
3665 sb->s_root = NULL;
3660 ext4_msg(sb, KERN_ERR, "mount failed"); 3666 ext4_msg(sb, KERN_ERR, "mount failed");
3661 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); 3667 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
3662failed_mount_wq: 3668failed_mount_wq:
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fc32176eee39..b545ca1c459c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -735,7 +735,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
735 int offset = (char *)s->here - bs->bh->b_data; 735 int offset = (char *)s->here - bs->bh->b_data;
736 736
737 unlock_buffer(bs->bh); 737 unlock_buffer(bs->bh);
738 jbd2_journal_release_buffer(handle, bs->bh); 738 ext4_handle_release_buffer(handle, bs->bh);
739 if (ce) { 739 if (ce) {
740 mb_cache_entry_release(ce); 740 mb_cache_entry_release(ce);
741 ce = NULL; 741 ce = NULL;
@@ -833,7 +833,7 @@ inserted:
833 new_bh = sb_getblk(sb, block); 833 new_bh = sb_getblk(sb, block);
834 if (!new_bh) { 834 if (!new_bh) {
835getblk_failed: 835getblk_failed:
836 ext4_free_blocks(handle, inode, 0, block, 1, 836 ext4_free_blocks(handle, inode, NULL, block, 1,
837 EXT4_FREE_BLOCKS_METADATA); 837 EXT4_FREE_BLOCKS_METADATA);
838 error = -EIO; 838 error = -EIO;
839 goto cleanup; 839 goto cleanup;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 0e277ec4b612..8d68690bdcf1 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -236,7 +236,6 @@ static const struct address_space_operations fat_aops = {
236 .readpages = fat_readpages, 236 .readpages = fat_readpages,
237 .writepage = fat_writepage, 237 .writepage = fat_writepage,
238 .writepages = fat_writepages, 238 .writepages = fat_writepages,
239 .sync_page = block_sync_page,
240 .write_begin = fat_write_begin, 239 .write_begin = fat_write_begin,
241 .write_end = fat_write_end, 240 .write_end = fat_write_end,
242 .direct_IO = fat_direct_IO, 241 .direct_IO = fat_direct_IO,
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6c82e5bac039..22764c7c8382 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -159,7 +159,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
159 159
160 /* O_NOATIME can only be set by the owner or superuser */ 160 /* O_NOATIME can only be set by the owner or superuser */
161 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) 161 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
162 if (!is_owner_or_cap(inode)) 162 if (!inode_owner_or_capable(inode))
163 return -EPERM; 163 return -EPERM;
164 164
165 /* required for strict SunOS emulation */ 165 /* required for strict SunOS emulation */
diff --git a/fs/fifo.c b/fs/fifo.c
index 4e303c22d5ee..b1a524d798e7 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -66,8 +66,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
66 /* suppress POLLHUP until we have 66 /* suppress POLLHUP until we have
67 * seen a writer */ 67 * seen a writer */
68 filp->f_version = pipe->w_counter; 68 filp->f_version = pipe->w_counter;
69 } else 69 } else {
70 {
71 wait_for_partner(inode, &pipe->w_counter); 70 wait_for_partner(inode, &pipe->w_counter);
72 if(signal_pending(current)) 71 if(signal_pending(current))
73 goto err_rd; 72 goto err_rd;
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index 1429f3ae1e86..5d318c44f855 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -44,7 +44,6 @@ static sector_t vxfs_bmap(struct address_space *, sector_t);
44const struct address_space_operations vxfs_aops = { 44const struct address_space_operations vxfs_aops = {
45 .readpage = vxfs_readpage, 45 .readpage = vxfs_readpage,
46 .bmap = vxfs_bmap, 46 .bmap = vxfs_bmap,
47 .sync_page = block_sync_page,
48}; 47};
49 48
50inline void 49inline void
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 59c6e4956786..b5ed541fb137 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -176,6 +176,17 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
176} 176}
177 177
178/* 178/*
179 * Remove the inode from the writeback list it is on.
180 */
181void inode_wb_list_del(struct inode *inode)
182{
183 spin_lock(&inode_wb_list_lock);
184 list_del_init(&inode->i_wb_list);
185 spin_unlock(&inode_wb_list_lock);
186}
187
188
189/*
179 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the 190 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
180 * furthest end of its superblock's dirty-inode list. 191 * furthest end of its superblock's dirty-inode list.
181 * 192 *
@@ -188,6 +199,7 @@ static void redirty_tail(struct inode *inode)
188{ 199{
189 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 200 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
190 201
202 assert_spin_locked(&inode_wb_list_lock);
191 if (!list_empty(&wb->b_dirty)) { 203 if (!list_empty(&wb->b_dirty)) {
192 struct inode *tail; 204 struct inode *tail;
193 205
@@ -205,14 +217,17 @@ static void requeue_io(struct inode *inode)
205{ 217{
206 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 218 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
207 219
220 assert_spin_locked(&inode_wb_list_lock);
208 list_move(&inode->i_wb_list, &wb->b_more_io); 221 list_move(&inode->i_wb_list, &wb->b_more_io);
209} 222}
210 223
211static void inode_sync_complete(struct inode *inode) 224static void inode_sync_complete(struct inode *inode)
212{ 225{
213 /* 226 /*
214 * Prevent speculative execution through spin_unlock(&inode_lock); 227 * Prevent speculative execution through
228 * spin_unlock(&inode_wb_list_lock);
215 */ 229 */
230
216 smp_mb(); 231 smp_mb();
217 wake_up_bit(&inode->i_state, __I_SYNC); 232 wake_up_bit(&inode->i_state, __I_SYNC);
218} 233}
@@ -286,6 +301,7 @@ static void move_expired_inodes(struct list_head *delaying_queue,
286 */ 301 */
287static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) 302static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
288{ 303{
304 assert_spin_locked(&inode_wb_list_lock);
289 list_splice_init(&wb->b_more_io, &wb->b_io); 305 list_splice_init(&wb->b_more_io, &wb->b_io);
290 move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); 306 move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
291} 307}
@@ -306,25 +322,25 @@ static void inode_wait_for_writeback(struct inode *inode)
306 wait_queue_head_t *wqh; 322 wait_queue_head_t *wqh;
307 323
308 wqh = bit_waitqueue(&inode->i_state, __I_SYNC); 324 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
309 while (inode->i_state & I_SYNC) { 325 while (inode->i_state & I_SYNC) {
310 spin_unlock(&inode_lock); 326 spin_unlock(&inode->i_lock);
327 spin_unlock(&inode_wb_list_lock);
311 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); 328 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
312 spin_lock(&inode_lock); 329 spin_lock(&inode_wb_list_lock);
330 spin_lock(&inode->i_lock);
313 } 331 }
314} 332}
315 333
316/* 334/*
317 * Write out an inode's dirty pages. Called under inode_lock. Either the 335 * Write out an inode's dirty pages. Called under inode_wb_list_lock and
318 * caller has ref on the inode (either via __iget or via syscall against an fd) 336 * inode->i_lock. Either the caller has an active reference on the inode or
319 * or the inode has I_WILL_FREE set (via generic_forget_inode) 337 * the inode has I_WILL_FREE set.
320 * 338 *
321 * If `wait' is set, wait on the writeout. 339 * If `wait' is set, wait on the writeout.
322 * 340 *
323 * The whole writeout design is quite complex and fragile. We want to avoid 341 * The whole writeout design is quite complex and fragile. We want to avoid
324 * starvation of particular inodes when others are being redirtied, prevent 342 * starvation of particular inodes when others are being redirtied, prevent
325 * livelocks, etc. 343 * livelocks, etc.
326 *
327 * Called under inode_lock.
328 */ 344 */
329static int 345static int
330writeback_single_inode(struct inode *inode, struct writeback_control *wbc) 346writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
@@ -333,6 +349,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
333 unsigned dirty; 349 unsigned dirty;
334 int ret; 350 int ret;
335 351
352 assert_spin_locked(&inode_wb_list_lock);
353 assert_spin_locked(&inode->i_lock);
354
336 if (!atomic_read(&inode->i_count)) 355 if (!atomic_read(&inode->i_count))
337 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); 356 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
338 else 357 else
@@ -363,7 +382,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
363 /* Set I_SYNC, reset I_DIRTY_PAGES */ 382 /* Set I_SYNC, reset I_DIRTY_PAGES */
364 inode->i_state |= I_SYNC; 383 inode->i_state |= I_SYNC;
365 inode->i_state &= ~I_DIRTY_PAGES; 384 inode->i_state &= ~I_DIRTY_PAGES;
366 spin_unlock(&inode_lock); 385 spin_unlock(&inode->i_lock);
386 spin_unlock(&inode_wb_list_lock);
367 387
368 ret = do_writepages(mapping, wbc); 388 ret = do_writepages(mapping, wbc);
369 389
@@ -383,10 +403,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
383 * due to delalloc, clear dirty metadata flags right before 403 * due to delalloc, clear dirty metadata flags right before
384 * write_inode() 404 * write_inode()
385 */ 405 */
386 spin_lock(&inode_lock); 406 spin_lock(&inode->i_lock);
387 dirty = inode->i_state & I_DIRTY; 407 dirty = inode->i_state & I_DIRTY;
388 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); 408 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
389 spin_unlock(&inode_lock); 409 spin_unlock(&inode->i_lock);
390 /* Don't write the inode if only I_DIRTY_PAGES was set */ 410 /* Don't write the inode if only I_DIRTY_PAGES was set */
391 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 411 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
392 int err = write_inode(inode, wbc); 412 int err = write_inode(inode, wbc);
@@ -394,7 +414,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
394 ret = err; 414 ret = err;
395 } 415 }
396 416
397 spin_lock(&inode_lock); 417 spin_lock(&inode_wb_list_lock);
418 spin_lock(&inode->i_lock);
398 inode->i_state &= ~I_SYNC; 419 inode->i_state &= ~I_SYNC;
399 if (!(inode->i_state & I_FREEING)) { 420 if (!(inode->i_state & I_FREEING)) {
400 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 421 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
@@ -506,7 +527,9 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
506 * kind does not need peridic writeout yet, and for the latter 527 * kind does not need peridic writeout yet, and for the latter
507 * kind writeout is handled by the freer. 528 * kind writeout is handled by the freer.
508 */ 529 */
530 spin_lock(&inode->i_lock);
509 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 531 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
532 spin_unlock(&inode->i_lock);
510 requeue_io(inode); 533 requeue_io(inode);
511 continue; 534 continue;
512 } 535 }
@@ -515,10 +538,13 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
515 * Was this inode dirtied after sync_sb_inodes was called? 538 * Was this inode dirtied after sync_sb_inodes was called?
516 * This keeps sync from extra jobs and livelock. 539 * This keeps sync from extra jobs and livelock.
517 */ 540 */
518 if (inode_dirtied_after(inode, wbc->wb_start)) 541 if (inode_dirtied_after(inode, wbc->wb_start)) {
542 spin_unlock(&inode->i_lock);
519 return 1; 543 return 1;
544 }
520 545
521 __iget(inode); 546 __iget(inode);
547
522 pages_skipped = wbc->pages_skipped; 548 pages_skipped = wbc->pages_skipped;
523 writeback_single_inode(inode, wbc); 549 writeback_single_inode(inode, wbc);
524 if (wbc->pages_skipped != pages_skipped) { 550 if (wbc->pages_skipped != pages_skipped) {
@@ -528,10 +554,11 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
528 */ 554 */
529 redirty_tail(inode); 555 redirty_tail(inode);
530 } 556 }
531 spin_unlock(&inode_lock); 557 spin_unlock(&inode->i_lock);
558 spin_unlock(&inode_wb_list_lock);
532 iput(inode); 559 iput(inode);
533 cond_resched(); 560 cond_resched();
534 spin_lock(&inode_lock); 561 spin_lock(&inode_wb_list_lock);
535 if (wbc->nr_to_write <= 0) { 562 if (wbc->nr_to_write <= 0) {
536 wbc->more_io = 1; 563 wbc->more_io = 1;
537 return 1; 564 return 1;
@@ -550,7 +577,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
550 577
551 if (!wbc->wb_start) 578 if (!wbc->wb_start)
552 wbc->wb_start = jiffies; /* livelock avoidance */ 579 wbc->wb_start = jiffies; /* livelock avoidance */
553 spin_lock(&inode_lock); 580 spin_lock(&inode_wb_list_lock);
554 if (!wbc->for_kupdate || list_empty(&wb->b_io)) 581 if (!wbc->for_kupdate || list_empty(&wb->b_io))
555 queue_io(wb, wbc->older_than_this); 582 queue_io(wb, wbc->older_than_this);
556 583
@@ -568,7 +595,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
568 if (ret) 595 if (ret)
569 break; 596 break;
570 } 597 }
571 spin_unlock(&inode_lock); 598 spin_unlock(&inode_wb_list_lock);
572 /* Leave any unwritten inodes on b_io */ 599 /* Leave any unwritten inodes on b_io */
573} 600}
574 601
@@ -577,11 +604,11 @@ static void __writeback_inodes_sb(struct super_block *sb,
577{ 604{
578 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 605 WARN_ON(!rwsem_is_locked(&sb->s_umount));
579 606
580 spin_lock(&inode_lock); 607 spin_lock(&inode_wb_list_lock);
581 if (!wbc->for_kupdate || list_empty(&wb->b_io)) 608 if (!wbc->for_kupdate || list_empty(&wb->b_io))
582 queue_io(wb, wbc->older_than_this); 609 queue_io(wb, wbc->older_than_this);
583 writeback_sb_inodes(sb, wb, wbc, true); 610 writeback_sb_inodes(sb, wb, wbc, true);
584 spin_unlock(&inode_lock); 611 spin_unlock(&inode_wb_list_lock);
585} 612}
586 613
587/* 614/*
@@ -720,13 +747,15 @@ static long wb_writeback(struct bdi_writeback *wb,
720 * become available for writeback. Otherwise 747 * become available for writeback. Otherwise
721 * we'll just busyloop. 748 * we'll just busyloop.
722 */ 749 */
723 spin_lock(&inode_lock); 750 spin_lock(&inode_wb_list_lock);
724 if (!list_empty(&wb->b_more_io)) { 751 if (!list_empty(&wb->b_more_io)) {
725 inode = wb_inode(wb->b_more_io.prev); 752 inode = wb_inode(wb->b_more_io.prev);
726 trace_wbc_writeback_wait(&wbc, wb->bdi); 753 trace_wbc_writeback_wait(&wbc, wb->bdi);
754 spin_lock(&inode->i_lock);
727 inode_wait_for_writeback(inode); 755 inode_wait_for_writeback(inode);
756 spin_unlock(&inode->i_lock);
728 } 757 }
729 spin_unlock(&inode_lock); 758 spin_unlock(&inode_wb_list_lock);
730 } 759 }
731 760
732 return wrote; 761 return wrote;
@@ -992,7 +1021,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
992{ 1021{
993 struct super_block *sb = inode->i_sb; 1022 struct super_block *sb = inode->i_sb;
994 struct backing_dev_info *bdi = NULL; 1023 struct backing_dev_info *bdi = NULL;
995 bool wakeup_bdi = false;
996 1024
997 /* 1025 /*
998 * Don't do this for I_DIRTY_PAGES - that doesn't actually 1026 * Don't do this for I_DIRTY_PAGES - that doesn't actually
@@ -1016,7 +1044,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1016 if (unlikely(block_dump)) 1044 if (unlikely(block_dump))
1017 block_dump___mark_inode_dirty(inode); 1045 block_dump___mark_inode_dirty(inode);
1018 1046
1019 spin_lock(&inode_lock); 1047 spin_lock(&inode->i_lock);
1020 if ((inode->i_state & flags) != flags) { 1048 if ((inode->i_state & flags) != flags) {
1021 const int was_dirty = inode->i_state & I_DIRTY; 1049 const int was_dirty = inode->i_state & I_DIRTY;
1022 1050
@@ -1028,7 +1056,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1028 * superblock list, based upon its state. 1056 * superblock list, based upon its state.
1029 */ 1057 */
1030 if (inode->i_state & I_SYNC) 1058 if (inode->i_state & I_SYNC)
1031 goto out; 1059 goto out_unlock_inode;
1032 1060
1033 /* 1061 /*
1034 * Only add valid (hashed) inodes to the superblock's 1062 * Only add valid (hashed) inodes to the superblock's
@@ -1036,16 +1064,17 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1036 */ 1064 */
1037 if (!S_ISBLK(inode->i_mode)) { 1065 if (!S_ISBLK(inode->i_mode)) {
1038 if (inode_unhashed(inode)) 1066 if (inode_unhashed(inode))
1039 goto out; 1067 goto out_unlock_inode;
1040 } 1068 }
1041 if (inode->i_state & I_FREEING) 1069 if (inode->i_state & I_FREEING)
1042 goto out; 1070 goto out_unlock_inode;
1043 1071
1044 /* 1072 /*
1045 * If the inode was already on b_dirty/b_io/b_more_io, don't 1073 * If the inode was already on b_dirty/b_io/b_more_io, don't
1046 * reposition it (that would break b_dirty time-ordering). 1074 * reposition it (that would break b_dirty time-ordering).
1047 */ 1075 */
1048 if (!was_dirty) { 1076 if (!was_dirty) {
1077 bool wakeup_bdi = false;
1049 bdi = inode_to_bdi(inode); 1078 bdi = inode_to_bdi(inode);
1050 1079
1051 if (bdi_cap_writeback_dirty(bdi)) { 1080 if (bdi_cap_writeback_dirty(bdi)) {
@@ -1062,15 +1091,20 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1062 wakeup_bdi = true; 1091 wakeup_bdi = true;
1063 } 1092 }
1064 1093
1094 spin_unlock(&inode->i_lock);
1095 spin_lock(&inode_wb_list_lock);
1065 inode->dirtied_when = jiffies; 1096 inode->dirtied_when = jiffies;
1066 list_move(&inode->i_wb_list, &bdi->wb.b_dirty); 1097 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
1098 spin_unlock(&inode_wb_list_lock);
1099
1100 if (wakeup_bdi)
1101 bdi_wakeup_thread_delayed(bdi);
1102 return;
1067 } 1103 }
1068 } 1104 }
1069out: 1105out_unlock_inode:
1070 spin_unlock(&inode_lock); 1106 spin_unlock(&inode->i_lock);
1071 1107
1072 if (wakeup_bdi)
1073 bdi_wakeup_thread_delayed(bdi);
1074} 1108}
1075EXPORT_SYMBOL(__mark_inode_dirty); 1109EXPORT_SYMBOL(__mark_inode_dirty);
1076 1110
@@ -1101,7 +1135,7 @@ static void wait_sb_inodes(struct super_block *sb)
1101 */ 1135 */
1102 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1136 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1103 1137
1104 spin_lock(&inode_lock); 1138 spin_lock(&inode_sb_list_lock);
1105 1139
1106 /* 1140 /*
1107 * Data integrity sync. Must wait for all pages under writeback, 1141 * Data integrity sync. Must wait for all pages under writeback,
@@ -1111,22 +1145,25 @@ static void wait_sb_inodes(struct super_block *sb)
1111 * we still have to wait for that writeout. 1145 * we still have to wait for that writeout.
1112 */ 1146 */
1113 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1147 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
1114 struct address_space *mapping; 1148 struct address_space *mapping = inode->i_mapping;
1115 1149
1116 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 1150 spin_lock(&inode->i_lock);
1117 continue; 1151 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
1118 mapping = inode->i_mapping; 1152 (mapping->nrpages == 0)) {
1119 if (mapping->nrpages == 0) 1153 spin_unlock(&inode->i_lock);
1120 continue; 1154 continue;
1155 }
1121 __iget(inode); 1156 __iget(inode);
1122 spin_unlock(&inode_lock); 1157 spin_unlock(&inode->i_lock);
1158 spin_unlock(&inode_sb_list_lock);
1159
1123 /* 1160 /*
1124 * We hold a reference to 'inode' so it couldn't have 1161 * We hold a reference to 'inode' so it couldn't have been
1125 * been removed from s_inodes list while we dropped the 1162 * removed from s_inodes list while we dropped the
1126 * inode_lock. We cannot iput the inode now as we can 1163 * inode_sb_list_lock. We cannot iput the inode now as we can
1127 * be holding the last reference and we cannot iput it 1164 * be holding the last reference and we cannot iput it under
1128 * under inode_lock. So we keep the reference and iput 1165 * inode_sb_list_lock. So we keep the reference and iput it
1129 * it later. 1166 * later.
1130 */ 1167 */
1131 iput(old_inode); 1168 iput(old_inode);
1132 old_inode = inode; 1169 old_inode = inode;
@@ -1135,9 +1172,9 @@ static void wait_sb_inodes(struct super_block *sb)
1135 1172
1136 cond_resched(); 1173 cond_resched();
1137 1174
1138 spin_lock(&inode_lock); 1175 spin_lock(&inode_sb_list_lock);
1139 } 1176 }
1140 spin_unlock(&inode_lock); 1177 spin_unlock(&inode_sb_list_lock);
1141 iput(old_inode); 1178 iput(old_inode);
1142} 1179}
1143 1180
@@ -1271,9 +1308,11 @@ int write_inode_now(struct inode *inode, int sync)
1271 wbc.nr_to_write = 0; 1308 wbc.nr_to_write = 0;
1272 1309
1273 might_sleep(); 1310 might_sleep();
1274 spin_lock(&inode_lock); 1311 spin_lock(&inode_wb_list_lock);
1312 spin_lock(&inode->i_lock);
1275 ret = writeback_single_inode(inode, &wbc); 1313 ret = writeback_single_inode(inode, &wbc);
1276 spin_unlock(&inode_lock); 1314 spin_unlock(&inode->i_lock);
1315 spin_unlock(&inode_wb_list_lock);
1277 if (sync) 1316 if (sync)
1278 inode_sync_wait(inode); 1317 inode_sync_wait(inode);
1279 return ret; 1318 return ret;
@@ -1295,9 +1334,11 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
1295{ 1334{
1296 int ret; 1335 int ret;
1297 1336
1298 spin_lock(&inode_lock); 1337 spin_lock(&inode_wb_list_lock);
1338 spin_lock(&inode->i_lock);
1299 ret = writeback_single_inode(inode, wbc); 1339 ret = writeback_single_inode(inode, wbc);
1300 spin_unlock(&inode_lock); 1340 spin_unlock(&inode->i_lock);
1341 spin_unlock(&inode_wb_list_lock);
1301 return ret; 1342 return ret;
1302} 1343}
1303EXPORT_SYMBOL(sync_inode); 1344EXPORT_SYMBOL(sync_inode);
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 7c39b885f969..b6cca47f7b07 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -305,7 +305,7 @@ static void cuse_gendev_release(struct device *dev)
305static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) 305static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
306{ 306{
307 struct cuse_conn *cc = fc_to_cc(fc); 307 struct cuse_conn *cc = fc_to_cc(fc);
308 struct cuse_init_out *arg = &req->misc.cuse_init_out; 308 struct cuse_init_out *arg = req->out.args[0].value;
309 struct page *page = req->pages[0]; 309 struct page *page = req->pages[0];
310 struct cuse_devinfo devinfo = { }; 310 struct cuse_devinfo devinfo = { };
311 struct device *dev; 311 struct device *dev;
@@ -384,6 +384,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
384 dev_set_uevent_suppress(dev, 0); 384 dev_set_uevent_suppress(dev, 0);
385 kobject_uevent(&dev->kobj, KOBJ_ADD); 385 kobject_uevent(&dev->kobj, KOBJ_ADD);
386out: 386out:
387 kfree(arg);
387 __free_page(page); 388 __free_page(page);
388 return; 389 return;
389 390
@@ -405,6 +406,7 @@ static int cuse_send_init(struct cuse_conn *cc)
405 struct page *page; 406 struct page *page;
406 struct fuse_conn *fc = &cc->fc; 407 struct fuse_conn *fc = &cc->fc;
407 struct cuse_init_in *arg; 408 struct cuse_init_in *arg;
409 void *outarg;
408 410
409 BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); 411 BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
410 412
@@ -419,6 +421,10 @@ static int cuse_send_init(struct cuse_conn *cc)
419 if (!page) 421 if (!page)
420 goto err_put_req; 422 goto err_put_req;
421 423
424 outarg = kzalloc(sizeof(struct cuse_init_out), GFP_KERNEL);
425 if (!outarg)
426 goto err_free_page;
427
422 arg = &req->misc.cuse_init_in; 428 arg = &req->misc.cuse_init_in;
423 arg->major = FUSE_KERNEL_VERSION; 429 arg->major = FUSE_KERNEL_VERSION;
424 arg->minor = FUSE_KERNEL_MINOR_VERSION; 430 arg->minor = FUSE_KERNEL_MINOR_VERSION;
@@ -429,7 +435,7 @@ static int cuse_send_init(struct cuse_conn *cc)
429 req->in.args[0].value = arg; 435 req->in.args[0].value = arg;
430 req->out.numargs = 2; 436 req->out.numargs = 2;
431 req->out.args[0].size = sizeof(struct cuse_init_out); 437 req->out.args[0].size = sizeof(struct cuse_init_out);
432 req->out.args[0].value = &req->misc.cuse_init_out; 438 req->out.args[0].value = outarg;
433 req->out.args[1].size = CUSE_INIT_INFO_MAX; 439 req->out.args[1].size = CUSE_INIT_INFO_MAX;
434 req->out.argvar = 1; 440 req->out.argvar = 1;
435 req->out.argpages = 1; 441 req->out.argpages = 1;
@@ -440,6 +446,8 @@ static int cuse_send_init(struct cuse_conn *cc)
440 446
441 return 0; 447 return 0;
442 448
449err_free_page:
450 __free_page(page);
443err_put_req: 451err_put_req:
444 fuse_put_request(fc, req); 452 fuse_put_request(fc, req);
445err: 453err:
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cf8d28d1fbad..640fc229df10 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -737,14 +737,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
737 if (WARN_ON(PageMlocked(oldpage))) 737 if (WARN_ON(PageMlocked(oldpage)))
738 goto out_fallback_unlock; 738 goto out_fallback_unlock;
739 739
740 remove_from_page_cache(oldpage); 740 err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
741 page_cache_release(oldpage);
742
743 err = add_to_page_cache_locked(newpage, mapping, index, GFP_KERNEL);
744 if (err) { 741 if (err) {
745 printk(KERN_WARNING "fuse_try_move_page: failed to add page"); 742 unlock_page(newpage);
746 goto out_fallback_unlock; 743 return err;
747 } 744 }
745
748 page_cache_get(newpage); 746 page_cache_get(newpage);
749 747
750 if (!(buf->flags & PIPE_BUF_FLAG_LRU)) 748 if (!(buf->flags & PIPE_BUF_FLAG_LRU))
@@ -1910,6 +1908,21 @@ __acquires(fc->lock)
1910 kfree(dequeue_forget(fc, 1, NULL)); 1908 kfree(dequeue_forget(fc, 1, NULL));
1911} 1909}
1912 1910
1911static void end_polls(struct fuse_conn *fc)
1912{
1913 struct rb_node *p;
1914
1915 p = rb_first(&fc->polled_files);
1916
1917 while (p) {
1918 struct fuse_file *ff;
1919 ff = rb_entry(p, struct fuse_file, polled_node);
1920 wake_up_interruptible_all(&ff->poll_wait);
1921
1922 p = rb_next(p);
1923 }
1924}
1925
1913/* 1926/*
1914 * Abort all requests. 1927 * Abort all requests.
1915 * 1928 *
@@ -1937,6 +1950,7 @@ void fuse_abort_conn(struct fuse_conn *fc)
1937 fc->blocked = 0; 1950 fc->blocked = 0;
1938 end_io_requests(fc); 1951 end_io_requests(fc);
1939 end_queued_requests(fc); 1952 end_queued_requests(fc);
1953 end_polls(fc);
1940 wake_up_all(&fc->waitq); 1954 wake_up_all(&fc->waitq);
1941 wake_up_all(&fc->blocked_waitq); 1955 wake_up_all(&fc->blocked_waitq);
1942 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 1956 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
@@ -1953,6 +1967,7 @@ int fuse_dev_release(struct inode *inode, struct file *file)
1953 fc->connected = 0; 1967 fc->connected = 0;
1954 fc->blocked = 0; 1968 fc->blocked = 0;
1955 end_queued_requests(fc); 1969 end_queued_requests(fc);
1970 end_polls(fc);
1956 wake_up_all(&fc->blocked_waitq); 1971 wake_up_all(&fc->blocked_waitq);
1957 spin_unlock(&fc->lock); 1972 spin_unlock(&fc->lock);
1958 fuse_conn_put(fc); 1973 fuse_conn_put(fc);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 8bd0ef9286c3..c6ba49bd95b3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -158,10 +158,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
158{ 158{
159 struct inode *inode; 159 struct inode *inode;
160 160
161 if (nd && nd->flags & LOOKUP_RCU) 161 inode = ACCESS_ONCE(entry->d_inode);
162 return -ECHILD;
163
164 inode = entry->d_inode;
165 if (inode && is_bad_inode(inode)) 162 if (inode && is_bad_inode(inode))
166 return 0; 163 return 0;
167 else if (fuse_dentry_time(entry) < get_jiffies_64()) { 164 else if (fuse_dentry_time(entry) < get_jiffies_64()) {
@@ -177,6 +174,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
177 if (!inode) 174 if (!inode)
178 return 0; 175 return 0;
179 176
177 if (nd->flags & LOOKUP_RCU)
178 return -ECHILD;
179
180 fc = get_fuse_conn(inode); 180 fc = get_fuse_conn(inode);
181 req = fuse_get_req(fc); 181 req = fuse_get_req(fc);
182 if (IS_ERR(req)) 182 if (IS_ERR(req))
@@ -970,6 +970,14 @@ static int fuse_access(struct inode *inode, int mask)
970 return err; 970 return err;
971} 971}
972 972
973static int fuse_perm_getattr(struct inode *inode, int flags)
974{
975 if (flags & IPERM_FLAG_RCU)
976 return -ECHILD;
977
978 return fuse_do_getattr(inode, NULL, NULL);
979}
980
973/* 981/*
974 * Check permission. The two basic access models of FUSE are: 982 * Check permission. The two basic access models of FUSE are:
975 * 983 *
@@ -989,9 +997,6 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
989 bool refreshed = false; 997 bool refreshed = false;
990 int err = 0; 998 int err = 0;
991 999
992 if (flags & IPERM_FLAG_RCU)
993 return -ECHILD;
994
995 if (!fuse_allow_task(fc, current)) 1000 if (!fuse_allow_task(fc, current))
996 return -EACCES; 1001 return -EACCES;
997 1002
@@ -1000,9 +1005,15 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
1000 */ 1005 */
1001 if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) || 1006 if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
1002 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { 1007 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1003 err = fuse_update_attributes(inode, NULL, NULL, &refreshed); 1008 struct fuse_inode *fi = get_fuse_inode(inode);
1004 if (err) 1009
1005 return err; 1010 if (fi->i_time < get_jiffies_64()) {
1011 refreshed = true;
1012
1013 err = fuse_perm_getattr(inode, flags);
1014 if (err)
1015 return err;
1016 }
1006 } 1017 }
1007 1018
1008 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { 1019 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
@@ -1012,7 +1023,7 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
1012 attributes. This is also needed, because the root 1023 attributes. This is also needed, because the root
1013 node will at first have no permissions */ 1024 node will at first have no permissions */
1014 if (err == -EACCES && !refreshed) { 1025 if (err == -EACCES && !refreshed) {
1015 err = fuse_do_getattr(inode, NULL, NULL); 1026 err = fuse_perm_getattr(inode, flags);
1016 if (!err) 1027 if (!err)
1017 err = generic_permission(inode, mask, 1028 err = generic_permission(inode, mask,
1018 flags, NULL); 1029 flags, NULL);
@@ -1023,13 +1034,16 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
1023 noticed immediately, only after the attribute 1034 noticed immediately, only after the attribute
1024 timeout has expired */ 1035 timeout has expired */
1025 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { 1036 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1037 if (flags & IPERM_FLAG_RCU)
1038 return -ECHILD;
1039
1026 err = fuse_access(inode, mask); 1040 err = fuse_access(inode, mask);
1027 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { 1041 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1028 if (!(inode->i_mode & S_IXUGO)) { 1042 if (!(inode->i_mode & S_IXUGO)) {
1029 if (refreshed) 1043 if (refreshed)
1030 return -EACCES; 1044 return -EACCES;
1031 1045
1032 err = fuse_do_getattr(inode, NULL, NULL); 1046 err = fuse_perm_getattr(inode, flags);
1033 if (!err && !(inode->i_mode & S_IXUGO)) 1047 if (!err && !(inode->i_mode & S_IXUGO))
1034 return -EACCES; 1048 return -EACCES;
1035 } 1049 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9e0832dbb1e3..6ea00734984e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -222,7 +222,7 @@ static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
222 rb_erase(&ff->polled_node, &fc->polled_files); 222 rb_erase(&ff->polled_node, &fc->polled_files);
223 spin_unlock(&fc->lock); 223 spin_unlock(&fc->lock);
224 224
225 wake_up_interruptible_sync(&ff->poll_wait); 225 wake_up_interruptible_all(&ff->poll_wait);
226 226
227 inarg->fh = ff->fh; 227 inarg->fh = ff->fh;
228 inarg->flags = flags; 228 inarg->flags = flags;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d4286947bc2c..b788becada76 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -272,7 +272,6 @@ struct fuse_req {
272 struct fuse_init_in init_in; 272 struct fuse_init_in init_in;
273 struct fuse_init_out init_out; 273 struct fuse_init_out init_out;
274 struct cuse_init_in cuse_init_in; 274 struct cuse_init_in cuse_init_in;
275 struct cuse_init_out cuse_init_out;
276 struct { 275 struct {
277 struct fuse_read_in in; 276 struct fuse_read_in in;
278 u64 attr_ver; 277 u64 attr_ver;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 051b1a084528..cc6ec4b2f0ff 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -870,7 +870,6 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
870 870
871 fc->bdi.name = "fuse"; 871 fc->bdi.name = "fuse";
872 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 872 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
873 fc->bdi.unplug_io_fn = default_unplug_io_fn;
874 /* fuse does it's own writeback accounting */ 873 /* fuse does it's own writeback accounting */
875 fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; 874 fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
876 875
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 06c48a891832..8f26d1a58912 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -74,7 +74,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value,
74 return -EINVAL; 74 return -EINVAL;
75 if (S_ISLNK(inode->i_mode)) 75 if (S_ISLNK(inode->i_mode))
76 return -EOPNOTSUPP; 76 return -EOPNOTSUPP;
77 if (!is_owner_or_cap(inode)) 77 if (!inode_owner_or_capable(inode))
78 return -EPERM; 78 return -EPERM;
79 if (value) { 79 if (value) {
80 acl = posix_acl_from_xattr(value, size); 80 acl = posix_acl_from_xattr(value, size);
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index 21f7e46da4c0..f3d23ef4e876 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,4 +1,4 @@
1EXTRA_CFLAGS := -I$(src) 1ccflags-y := -I$(src)
2obj-$(CONFIG_GFS2_FS) += gfs2.o 2obj-$(CONFIG_GFS2_FS) += gfs2.o
3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \ 3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
4 glops.o inode.o log.o lops.o main.o meta_io.o \ 4 glops.o inode.o log.o lops.o main.o meta_io.o \
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index aad77e4f61b5..c71995b111bf 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1117,7 +1117,6 @@ static const struct address_space_operations gfs2_writeback_aops = {
1117 .writepages = gfs2_writeback_writepages, 1117 .writepages = gfs2_writeback_writepages,
1118 .readpage = gfs2_readpage, 1118 .readpage = gfs2_readpage,
1119 .readpages = gfs2_readpages, 1119 .readpages = gfs2_readpages,
1120 .sync_page = block_sync_page,
1121 .write_begin = gfs2_write_begin, 1120 .write_begin = gfs2_write_begin,
1122 .write_end = gfs2_write_end, 1121 .write_end = gfs2_write_end,
1123 .bmap = gfs2_bmap, 1122 .bmap = gfs2_bmap,
@@ -1133,7 +1132,6 @@ static const struct address_space_operations gfs2_ordered_aops = {
1133 .writepage = gfs2_ordered_writepage, 1132 .writepage = gfs2_ordered_writepage,
1134 .readpage = gfs2_readpage, 1133 .readpage = gfs2_readpage,
1135 .readpages = gfs2_readpages, 1134 .readpages = gfs2_readpages,
1136 .sync_page = block_sync_page,
1137 .write_begin = gfs2_write_begin, 1135 .write_begin = gfs2_write_begin,
1138 .write_end = gfs2_write_end, 1136 .write_end = gfs2_write_end,
1139 .set_page_dirty = gfs2_set_page_dirty, 1137 .set_page_dirty = gfs2_set_page_dirty,
@@ -1151,7 +1149,6 @@ static const struct address_space_operations gfs2_jdata_aops = {
1151 .writepages = gfs2_jdata_writepages, 1149 .writepages = gfs2_jdata_writepages,
1152 .readpage = gfs2_readpage, 1150 .readpage = gfs2_readpage,
1153 .readpages = gfs2_readpages, 1151 .readpages = gfs2_readpages,
1154 .sync_page = block_sync_page,
1155 .write_begin = gfs2_write_begin, 1152 .write_begin = gfs2_write_begin,
1156 .write_end = gfs2_write_end, 1153 .write_end = gfs2_write_end,
1157 .set_page_dirty = gfs2_set_page_dirty, 1154 .set_page_dirty = gfs2_set_page_dirty,
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4074b952b059..b2682e073eee 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -221,7 +221,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
221 goto out_drop_write; 221 goto out_drop_write;
222 222
223 error = -EACCES; 223 error = -EACCES;
224 if (!is_owner_or_cap(inode)) 224 if (!inode_owner_or_capable(inode))
225 goto out; 225 goto out;
226 226
227 error = 0; 227 error = 0;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index e7ed31f858dd..5b102c1887fd 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -121,7 +121,7 @@ __acquires(&sdp->sd_ail_lock)
121 lock_buffer(bh); 121 lock_buffer(bh);
122 if (test_clear_buffer_dirty(bh)) { 122 if (test_clear_buffer_dirty(bh)) {
123 bh->b_end_io = end_buffer_write_sync; 123 bh->b_end_io = end_buffer_write_sync;
124 submit_bh(WRITE_SYNC_PLUG, bh); 124 submit_bh(WRITE_SYNC, bh);
125 } else { 125 } else {
126 unlock_buffer(bh); 126 unlock_buffer(bh);
127 brelse(bh); 127 brelse(bh);
@@ -647,7 +647,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
647 lock_buffer(bh); 647 lock_buffer(bh);
648 if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { 648 if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
649 bh->b_end_io = end_buffer_write_sync; 649 bh->b_end_io = end_buffer_write_sync;
650 submit_bh(WRITE_SYNC_PLUG, bh); 650 submit_bh(WRITE_SYNC, bh);
651 } else { 651 } else {
652 unlock_buffer(bh); 652 unlock_buffer(bh);
653 brelse(bh); 653 brelse(bh);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index e919abf25ecd..51d27f00ebb4 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -204,7 +204,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
204 } 204 }
205 205
206 gfs2_log_unlock(sdp); 206 gfs2_log_unlock(sdp);
207 submit_bh(WRITE_SYNC_PLUG, bh); 207 submit_bh(WRITE_SYNC, bh);
208 gfs2_log_lock(sdp); 208 gfs2_log_lock(sdp);
209 209
210 n = 0; 210 n = 0;
@@ -214,7 +214,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
214 gfs2_log_unlock(sdp); 214 gfs2_log_unlock(sdp);
215 lock_buffer(bd2->bd_bh); 215 lock_buffer(bd2->bd_bh);
216 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); 216 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
217 submit_bh(WRITE_SYNC_PLUG, bh); 217 submit_bh(WRITE_SYNC, bh);
218 gfs2_log_lock(sdp); 218 gfs2_log_lock(sdp);
219 if (++n >= num) 219 if (++n >= num)
220 break; 220 break;
@@ -356,7 +356,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
356 sdp->sd_log_num_revoke--; 356 sdp->sd_log_num_revoke--;
357 357
358 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { 358 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
359 submit_bh(WRITE_SYNC_PLUG, bh); 359 submit_bh(WRITE_SYNC, bh);
360 360
361 bh = gfs2_log_get_buf(sdp); 361 bh = gfs2_log_get_buf(sdp);
362 mh = (struct gfs2_meta_header *)bh->b_data; 362 mh = (struct gfs2_meta_header *)bh->b_data;
@@ -373,7 +373,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
373 } 373 }
374 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 374 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
375 375
376 submit_bh(WRITE_SYNC_PLUG, bh); 376 submit_bh(WRITE_SYNC, bh);
377} 377}
378 378
379static void revoke_lo_before_scan(struct gfs2_jdesc *jd, 379static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
@@ -575,7 +575,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
575 ptr = bh_log_ptr(bh); 575 ptr = bh_log_ptr(bh);
576 576
577 get_bh(bh); 577 get_bh(bh);
578 submit_bh(WRITE_SYNC_PLUG, bh); 578 submit_bh(WRITE_SYNC, bh);
579 gfs2_log_lock(sdp); 579 gfs2_log_lock(sdp);
580 while(!list_empty(list)) { 580 while(!list_empty(list)) {
581 bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); 581 bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
@@ -601,7 +601,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
601 } else { 601 } else {
602 bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); 602 bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
603 } 603 }
604 submit_bh(WRITE_SYNC_PLUG, bh1); 604 submit_bh(WRITE_SYNC, bh1);
605 gfs2_log_lock(sdp); 605 gfs2_log_lock(sdp);
606 ptr += 2; 606 ptr += 2;
607 } 607 }
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 01d97f486553..675349b5a133 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -37,7 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
37 struct buffer_head *bh, *head; 37 struct buffer_head *bh, *head;
38 int nr_underway = 0; 38 int nr_underway = 0;
39 int write_op = REQ_META | 39 int write_op = REQ_META |
40 (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE); 40 (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
41 41
42 BUG_ON(!PageLocked(page)); 42 BUG_ON(!PageLocked(page));
43 BUG_ON(!page_has_buffers(page)); 43 BUG_ON(!page_has_buffers(page));
@@ -94,7 +94,6 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
94const struct address_space_operations gfs2_meta_aops = { 94const struct address_space_operations gfs2_meta_aops = {
95 .writepage = gfs2_aspace_writepage, 95 .writepage = gfs2_aspace_writepage,
96 .releasepage = gfs2_releasepage, 96 .releasepage = gfs2_releasepage,
97 .sync_page = block_sync_page,
98}; 97};
99 98
100/** 99/**
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index dffb4e996643..fff16c968e67 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -150,7 +150,6 @@ static int hfs_writepages(struct address_space *mapping,
150const struct address_space_operations hfs_btree_aops = { 150const struct address_space_operations hfs_btree_aops = {
151 .readpage = hfs_readpage, 151 .readpage = hfs_readpage,
152 .writepage = hfs_writepage, 152 .writepage = hfs_writepage,
153 .sync_page = block_sync_page,
154 .write_begin = hfs_write_begin, 153 .write_begin = hfs_write_begin,
155 .write_end = generic_write_end, 154 .write_end = generic_write_end,
156 .bmap = hfs_bmap, 155 .bmap = hfs_bmap,
@@ -160,7 +159,6 @@ const struct address_space_operations hfs_btree_aops = {
160const struct address_space_operations hfs_aops = { 159const struct address_space_operations hfs_aops = {
161 .readpage = hfs_readpage, 160 .readpage = hfs_readpage,
162 .writepage = hfs_writepage, 161 .writepage = hfs_writepage,
163 .sync_page = block_sync_page,
164 .write_begin = hfs_write_begin, 162 .write_begin = hfs_write_begin,
165 .write_end = generic_write_end, 163 .write_end = generic_write_end,
166 .bmap = hfs_bmap, 164 .bmap = hfs_bmap,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index a8df651747f0..b248a6cfcad9 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -146,7 +146,6 @@ static int hfsplus_writepages(struct address_space *mapping,
146const struct address_space_operations hfsplus_btree_aops = { 146const struct address_space_operations hfsplus_btree_aops = {
147 .readpage = hfsplus_readpage, 147 .readpage = hfsplus_readpage,
148 .writepage = hfsplus_writepage, 148 .writepage = hfsplus_writepage,
149 .sync_page = block_sync_page,
150 .write_begin = hfsplus_write_begin, 149 .write_begin = hfsplus_write_begin,
151 .write_end = generic_write_end, 150 .write_end = generic_write_end,
152 .bmap = hfsplus_bmap, 151 .bmap = hfsplus_bmap,
@@ -156,7 +155,6 @@ const struct address_space_operations hfsplus_btree_aops = {
156const struct address_space_operations hfsplus_aops = { 155const struct address_space_operations hfsplus_aops = {
157 .readpage = hfsplus_readpage, 156 .readpage = hfsplus_readpage,
158 .writepage = hfsplus_writepage, 157 .writepage = hfsplus_writepage,
159 .sync_page = block_sync_page,
160 .write_begin = hfsplus_write_begin, 158 .write_begin = hfsplus_write_begin,
161 .write_end = generic_write_end, 159 .write_end = generic_write_end,
162 .bmap = hfsplus_bmap, 160 .bmap = hfsplus_bmap,
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 508ce662ce12..fbaa6690c8e0 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -47,7 +47,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
47 if (err) 47 if (err)
48 goto out; 48 goto out;
49 49
50 if (!is_owner_or_cap(inode)) { 50 if (!inode_owner_or_capable(inode)) {
51 err = -EACCES; 51 err = -EACCES;
52 goto out_drop_write; 52 goto out_drop_write;
53 } 53 }
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 2dbae20450f8..9b9eb6933e43 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -119,7 +119,6 @@ static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
119const struct address_space_operations hpfs_aops = { 119const struct address_space_operations hpfs_aops = {
120 .readpage = hpfs_readpage, 120 .readpage = hpfs_readpage,
121 .writepage = hpfs_writepage, 121 .writepage = hpfs_writepage,
122 .sync_page = block_sync_page,
123 .write_begin = hpfs_write_begin, 122 .write_begin = hpfs_write_begin,
124 .write_end = generic_write_end, 123 .write_end = generic_write_end,
125 .bmap = _hpfs_bmap 124 .bmap = _hpfs_bmap
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9885082b470f..b9eeb1cd03ff 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -332,8 +332,7 @@ static void truncate_huge_page(struct page *page)
332{ 332{
333 cancel_dirty_page(page, /* No IO accounting for huge pages? */0); 333 cancel_dirty_page(page, /* No IO accounting for huge pages? */0);
334 ClearPageUptodate(page); 334 ClearPageUptodate(page);
335 remove_from_page_cache(page); 335 delete_from_page_cache(page);
336 put_page(page);
337} 336}
338 337
339static void truncate_hugepages(struct inode *inode, loff_t lstart) 338static void truncate_hugepages(struct inode *inode, loff_t lstart)
diff --git a/fs/inode.c b/fs/inode.c
index 9910c039f026..5f4e11aaeb5c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -25,6 +25,39 @@
25#include <linux/async.h> 25#include <linux/async.h>
26#include <linux/posix_acl.h> 26#include <linux/posix_acl.h>
27#include <linux/ima.h> 27#include <linux/ima.h>
28#include <linux/cred.h>
29#include "internal.h"
30
31/*
32 * inode locking rules.
33 *
34 * inode->i_lock protects:
35 * inode->i_state, inode->i_hash, __iget()
36 * inode_lru_lock protects:
37 * inode_lru, inode->i_lru
38 * inode_sb_list_lock protects:
39 * sb->s_inodes, inode->i_sb_list
40 * inode_wb_list_lock protects:
41 * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
42 * inode_hash_lock protects:
43 * inode_hashtable, inode->i_hash
44 *
45 * Lock ordering:
46 *
47 * inode_sb_list_lock
48 * inode->i_lock
49 * inode_lru_lock
50 *
51 * inode_wb_list_lock
52 * inode->i_lock
53 *
54 * inode_hash_lock
55 * inode_sb_list_lock
56 * inode->i_lock
57 *
58 * iunique_lock
59 * inode_hash_lock
60 */
28 61
29/* 62/*
30 * This is needed for the following functions: 63 * This is needed for the following functions:
@@ -59,6 +92,8 @@
59 92
60static unsigned int i_hash_mask __read_mostly; 93static unsigned int i_hash_mask __read_mostly;
61static unsigned int i_hash_shift __read_mostly; 94static unsigned int i_hash_shift __read_mostly;
95static struct hlist_head *inode_hashtable __read_mostly;
96static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
62 97
63/* 98/*
64 * Each inode can be on two separate lists. One is 99 * Each inode can be on two separate lists. One is
@@ -73,15 +108,10 @@ static unsigned int i_hash_shift __read_mostly;
73 */ 108 */
74 109
75static LIST_HEAD(inode_lru); 110static LIST_HEAD(inode_lru);
76static struct hlist_head *inode_hashtable __read_mostly; 111static DEFINE_SPINLOCK(inode_lru_lock);
77 112
78/* 113__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
79 * A simple spinlock to protect the list manipulations. 114__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
80 *
81 * NOTE! You also have to own the lock if you change
82 * the i_state of an inode while it is in use..
83 */
84DEFINE_SPINLOCK(inode_lock);
85 115
86/* 116/*
87 * iprune_sem provides exclusion between the icache shrinking and the 117 * iprune_sem provides exclusion between the icache shrinking and the
@@ -136,15 +166,6 @@ int proc_nr_inodes(ctl_table *table, int write,
136} 166}
137#endif 167#endif
138 168
139static void wake_up_inode(struct inode *inode)
140{
141 /*
142 * Prevent speculative execution through spin_unlock(&inode_lock);
143 */
144 smp_mb();
145 wake_up_bit(&inode->i_state, __I_NEW);
146}
147
148/** 169/**
149 * inode_init_always - perform inode structure intialisation 170 * inode_init_always - perform inode structure intialisation
150 * @sb: superblock inode belongs to 171 * @sb: superblock inode belongs to
@@ -335,7 +356,7 @@ static void init_once(void *foo)
335} 356}
336 357
337/* 358/*
338 * inode_lock must be held 359 * inode->i_lock must be held
339 */ 360 */
340void __iget(struct inode *inode) 361void __iget(struct inode *inode)
341{ 362{
@@ -353,23 +374,22 @@ EXPORT_SYMBOL(ihold);
353 374
354static void inode_lru_list_add(struct inode *inode) 375static void inode_lru_list_add(struct inode *inode)
355{ 376{
377 spin_lock(&inode_lru_lock);
356 if (list_empty(&inode->i_lru)) { 378 if (list_empty(&inode->i_lru)) {
357 list_add(&inode->i_lru, &inode_lru); 379 list_add(&inode->i_lru, &inode_lru);
358 inodes_stat.nr_unused++; 380 inodes_stat.nr_unused++;
359 } 381 }
382 spin_unlock(&inode_lru_lock);
360} 383}
361 384
362static void inode_lru_list_del(struct inode *inode) 385static void inode_lru_list_del(struct inode *inode)
363{ 386{
387 spin_lock(&inode_lru_lock);
364 if (!list_empty(&inode->i_lru)) { 388 if (!list_empty(&inode->i_lru)) {
365 list_del_init(&inode->i_lru); 389 list_del_init(&inode->i_lru);
366 inodes_stat.nr_unused--; 390 inodes_stat.nr_unused--;
367 } 391 }
368} 392 spin_unlock(&inode_lru_lock);
369
370static inline void __inode_sb_list_add(struct inode *inode)
371{
372 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
373} 393}
374 394
375/** 395/**
@@ -378,15 +398,17 @@ static inline void __inode_sb_list_add(struct inode *inode)
378 */ 398 */
379void inode_sb_list_add(struct inode *inode) 399void inode_sb_list_add(struct inode *inode)
380{ 400{
381 spin_lock(&inode_lock); 401 spin_lock(&inode_sb_list_lock);
382 __inode_sb_list_add(inode); 402 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
383 spin_unlock(&inode_lock); 403 spin_unlock(&inode_sb_list_lock);
384} 404}
385EXPORT_SYMBOL_GPL(inode_sb_list_add); 405EXPORT_SYMBOL_GPL(inode_sb_list_add);
386 406
387static inline void __inode_sb_list_del(struct inode *inode) 407static inline void inode_sb_list_del(struct inode *inode)
388{ 408{
409 spin_lock(&inode_sb_list_lock);
389 list_del_init(&inode->i_sb_list); 410 list_del_init(&inode->i_sb_list);
411 spin_unlock(&inode_sb_list_lock);
390} 412}
391 413
392static unsigned long hash(struct super_block *sb, unsigned long hashval) 414static unsigned long hash(struct super_block *sb, unsigned long hashval)
@@ -411,24 +433,15 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval)
411{ 433{
412 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); 434 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
413 435
414 spin_lock(&inode_lock); 436 spin_lock(&inode_hash_lock);
437 spin_lock(&inode->i_lock);
415 hlist_add_head(&inode->i_hash, b); 438 hlist_add_head(&inode->i_hash, b);
416 spin_unlock(&inode_lock); 439 spin_unlock(&inode->i_lock);
440 spin_unlock(&inode_hash_lock);
417} 441}
418EXPORT_SYMBOL(__insert_inode_hash); 442EXPORT_SYMBOL(__insert_inode_hash);
419 443
420/** 444/**
421 * __remove_inode_hash - remove an inode from the hash
422 * @inode: inode to unhash
423 *
424 * Remove an inode from the superblock.
425 */
426static void __remove_inode_hash(struct inode *inode)
427{
428 hlist_del_init(&inode->i_hash);
429}
430
431/**
432 * remove_inode_hash - remove an inode from the hash 445 * remove_inode_hash - remove an inode from the hash
433 * @inode: inode to unhash 446 * @inode: inode to unhash
434 * 447 *
@@ -436,9 +449,11 @@ static void __remove_inode_hash(struct inode *inode)
436 */ 449 */
437void remove_inode_hash(struct inode *inode) 450void remove_inode_hash(struct inode *inode)
438{ 451{
439 spin_lock(&inode_lock); 452 spin_lock(&inode_hash_lock);
453 spin_lock(&inode->i_lock);
440 hlist_del_init(&inode->i_hash); 454 hlist_del_init(&inode->i_hash);
441 spin_unlock(&inode_lock); 455 spin_unlock(&inode->i_lock);
456 spin_unlock(&inode_hash_lock);
442} 457}
443EXPORT_SYMBOL(remove_inode_hash); 458EXPORT_SYMBOL(remove_inode_hash);
444 459
@@ -455,10 +470,29 @@ void end_writeback(struct inode *inode)
455} 470}
456EXPORT_SYMBOL(end_writeback); 471EXPORT_SYMBOL(end_writeback);
457 472
473/*
474 * Free the inode passed in, removing it from the lists it is still connected
475 * to. We remove any pages still attached to the inode and wait for any IO that
476 * is still in progress before finally destroying the inode.
477 *
478 * An inode must already be marked I_FREEING so that we avoid the inode being
479 * moved back onto lists if we race with other code that manipulates the lists
480 * (e.g. writeback_single_inode). The caller is responsible for setting this.
481 *
482 * An inode must already be removed from the LRU list before being evicted from
483 * the cache. This should occur atomically with setting the I_FREEING state
484 * flag, so no inodes here should ever be on the LRU when being evicted.
485 */
458static void evict(struct inode *inode) 486static void evict(struct inode *inode)
459{ 487{
460 const struct super_operations *op = inode->i_sb->s_op; 488 const struct super_operations *op = inode->i_sb->s_op;
461 489
490 BUG_ON(!(inode->i_state & I_FREEING));
491 BUG_ON(!list_empty(&inode->i_lru));
492
493 inode_wb_list_del(inode);
494 inode_sb_list_del(inode);
495
462 if (op->evict_inode) { 496 if (op->evict_inode) {
463 op->evict_inode(inode); 497 op->evict_inode(inode);
464 } else { 498 } else {
@@ -470,6 +504,15 @@ static void evict(struct inode *inode)
470 bd_forget(inode); 504 bd_forget(inode);
471 if (S_ISCHR(inode->i_mode) && inode->i_cdev) 505 if (S_ISCHR(inode->i_mode) && inode->i_cdev)
472 cd_forget(inode); 506 cd_forget(inode);
507
508 remove_inode_hash(inode);
509
510 spin_lock(&inode->i_lock);
511 wake_up_bit(&inode->i_state, __I_NEW);
512 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
513 spin_unlock(&inode->i_lock);
514
515 destroy_inode(inode);
473} 516}
474 517
475/* 518/*
@@ -488,14 +531,6 @@ static void dispose_list(struct list_head *head)
488 list_del_init(&inode->i_lru); 531 list_del_init(&inode->i_lru);
489 532
490 evict(inode); 533 evict(inode);
491
492 spin_lock(&inode_lock);
493 __remove_inode_hash(inode);
494 __inode_sb_list_del(inode);
495 spin_unlock(&inode_lock);
496
497 wake_up_inode(inode);
498 destroy_inode(inode);
499 } 534 }
500} 535}
501 536
@@ -513,25 +548,23 @@ void evict_inodes(struct super_block *sb)
513 struct inode *inode, *next; 548 struct inode *inode, *next;
514 LIST_HEAD(dispose); 549 LIST_HEAD(dispose);
515 550
516 spin_lock(&inode_lock); 551 spin_lock(&inode_sb_list_lock);
517 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 552 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
518 if (atomic_read(&inode->i_count)) 553 if (atomic_read(&inode->i_count))
519 continue; 554 continue;
520 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 555
556 spin_lock(&inode->i_lock);
557 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
558 spin_unlock(&inode->i_lock);
521 continue; 559 continue;
560 }
522 561
523 inode->i_state |= I_FREEING; 562 inode->i_state |= I_FREEING;
524 563 inode_lru_list_del(inode);
525 /* 564 spin_unlock(&inode->i_lock);
526 * Move the inode off the IO lists and LRU once I_FREEING is 565 list_add(&inode->i_lru, &dispose);
527 * set so that it won't get moved back on there if it is dirty.
528 */
529 list_move(&inode->i_lru, &dispose);
530 list_del_init(&inode->i_wb_list);
531 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
532 inodes_stat.nr_unused--;
533 } 566 }
534 spin_unlock(&inode_lock); 567 spin_unlock(&inode_sb_list_lock);
535 568
536 dispose_list(&dispose); 569 dispose_list(&dispose);
537 570
@@ -560,31 +593,30 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
560 struct inode *inode, *next; 593 struct inode *inode, *next;
561 LIST_HEAD(dispose); 594 LIST_HEAD(dispose);
562 595
563 spin_lock(&inode_lock); 596 spin_lock(&inode_sb_list_lock);
564 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 597 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
565 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 598 spin_lock(&inode->i_lock);
599 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
600 spin_unlock(&inode->i_lock);
566 continue; 601 continue;
602 }
567 if (inode->i_state & I_DIRTY && !kill_dirty) { 603 if (inode->i_state & I_DIRTY && !kill_dirty) {
604 spin_unlock(&inode->i_lock);
568 busy = 1; 605 busy = 1;
569 continue; 606 continue;
570 } 607 }
571 if (atomic_read(&inode->i_count)) { 608 if (atomic_read(&inode->i_count)) {
609 spin_unlock(&inode->i_lock);
572 busy = 1; 610 busy = 1;
573 continue; 611 continue;
574 } 612 }
575 613
576 inode->i_state |= I_FREEING; 614 inode->i_state |= I_FREEING;
577 615 inode_lru_list_del(inode);
578 /* 616 spin_unlock(&inode->i_lock);
579 * Move the inode off the IO lists and LRU once I_FREEING is 617 list_add(&inode->i_lru, &dispose);
580 * set so that it won't get moved back on there if it is dirty.
581 */
582 list_move(&inode->i_lru, &dispose);
583 list_del_init(&inode->i_wb_list);
584 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
585 inodes_stat.nr_unused--;
586 } 618 }
587 spin_unlock(&inode_lock); 619 spin_unlock(&inode_sb_list_lock);
588 620
589 dispose_list(&dispose); 621 dispose_list(&dispose);
590 622
@@ -606,7 +638,7 @@ static int can_unuse(struct inode *inode)
606 638
607/* 639/*
608 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a 640 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a
609 * temporary list and then are freed outside inode_lock by dispose_list(). 641 * temporary list and then are freed outside inode_lru_lock by dispose_list().
610 * 642 *
611 * Any inodes which are pinned purely because of attached pagecache have their 643 * Any inodes which are pinned purely because of attached pagecache have their
612 * pagecache removed. If the inode has metadata buffers attached to 644 * pagecache removed. If the inode has metadata buffers attached to
@@ -627,7 +659,7 @@ static void prune_icache(int nr_to_scan)
627 unsigned long reap = 0; 659 unsigned long reap = 0;
628 660
629 down_read(&iprune_sem); 661 down_read(&iprune_sem);
630 spin_lock(&inode_lock); 662 spin_lock(&inode_lru_lock);
631 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 663 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
632 struct inode *inode; 664 struct inode *inode;
633 665
@@ -637,53 +669,67 @@ static void prune_icache(int nr_to_scan)
637 inode = list_entry(inode_lru.prev, struct inode, i_lru); 669 inode = list_entry(inode_lru.prev, struct inode, i_lru);
638 670
639 /* 671 /*
672 * we are inverting the inode_lru_lock/inode->i_lock here,
673 * so use a trylock. If we fail to get the lock, just move the
674 * inode to the back of the list so we don't spin on it.
675 */
676 if (!spin_trylock(&inode->i_lock)) {
677 list_move(&inode->i_lru, &inode_lru);
678 continue;
679 }
680
681 /*
640 * Referenced or dirty inodes are still in use. Give them 682 * Referenced or dirty inodes are still in use. Give them
641 * another pass through the LRU as we canot reclaim them now. 683 * another pass through the LRU as we canot reclaim them now.
642 */ 684 */
643 if (atomic_read(&inode->i_count) || 685 if (atomic_read(&inode->i_count) ||
644 (inode->i_state & ~I_REFERENCED)) { 686 (inode->i_state & ~I_REFERENCED)) {
645 list_del_init(&inode->i_lru); 687 list_del_init(&inode->i_lru);
688 spin_unlock(&inode->i_lock);
646 inodes_stat.nr_unused--; 689 inodes_stat.nr_unused--;
647 continue; 690 continue;
648 } 691 }
649 692
650 /* recently referenced inodes get one more pass */ 693 /* recently referenced inodes get one more pass */
651 if (inode->i_state & I_REFERENCED) { 694 if (inode->i_state & I_REFERENCED) {
652 list_move(&inode->i_lru, &inode_lru);
653 inode->i_state &= ~I_REFERENCED; 695 inode->i_state &= ~I_REFERENCED;
696 list_move(&inode->i_lru, &inode_lru);
697 spin_unlock(&inode->i_lock);
654 continue; 698 continue;
655 } 699 }
656 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 700 if (inode_has_buffers(inode) || inode->i_data.nrpages) {
657 __iget(inode); 701 __iget(inode);
658 spin_unlock(&inode_lock); 702 spin_unlock(&inode->i_lock);
703 spin_unlock(&inode_lru_lock);
659 if (remove_inode_buffers(inode)) 704 if (remove_inode_buffers(inode))
660 reap += invalidate_mapping_pages(&inode->i_data, 705 reap += invalidate_mapping_pages(&inode->i_data,
661 0, -1); 706 0, -1);
662 iput(inode); 707 iput(inode);
663 spin_lock(&inode_lock); 708 spin_lock(&inode_lru_lock);
664 709
665 if (inode != list_entry(inode_lru.next, 710 if (inode != list_entry(inode_lru.next,
666 struct inode, i_lru)) 711 struct inode, i_lru))
667 continue; /* wrong inode or list_empty */ 712 continue; /* wrong inode or list_empty */
668 if (!can_unuse(inode)) 713 /* avoid lock inversions with trylock */
714 if (!spin_trylock(&inode->i_lock))
669 continue; 715 continue;
716 if (!can_unuse(inode)) {
717 spin_unlock(&inode->i_lock);
718 continue;
719 }
670 } 720 }
671 WARN_ON(inode->i_state & I_NEW); 721 WARN_ON(inode->i_state & I_NEW);
672 inode->i_state |= I_FREEING; 722 inode->i_state |= I_FREEING;
723 spin_unlock(&inode->i_lock);
673 724
674 /*
675 * Move the inode off the IO lists and LRU once I_FREEING is
676 * set so that it won't get moved back on there if it is dirty.
677 */
678 list_move(&inode->i_lru, &freeable); 725 list_move(&inode->i_lru, &freeable);
679 list_del_init(&inode->i_wb_list);
680 inodes_stat.nr_unused--; 726 inodes_stat.nr_unused--;
681 } 727 }
682 if (current_is_kswapd()) 728 if (current_is_kswapd())
683 __count_vm_events(KSWAPD_INODESTEAL, reap); 729 __count_vm_events(KSWAPD_INODESTEAL, reap);
684 else 730 else
685 __count_vm_events(PGINODESTEAL, reap); 731 __count_vm_events(PGINODESTEAL, reap);
686 spin_unlock(&inode_lock); 732 spin_unlock(&inode_lru_lock);
687 733
688 dispose_list(&freeable); 734 dispose_list(&freeable);
689 up_read(&iprune_sem); 735 up_read(&iprune_sem);
@@ -732,15 +778,21 @@ static struct inode *find_inode(struct super_block *sb,
732 778
733repeat: 779repeat:
734 hlist_for_each_entry(inode, node, head, i_hash) { 780 hlist_for_each_entry(inode, node, head, i_hash) {
735 if (inode->i_sb != sb) 781 spin_lock(&inode->i_lock);
782 if (inode->i_sb != sb) {
783 spin_unlock(&inode->i_lock);
736 continue; 784 continue;
737 if (!test(inode, data)) 785 }
786 if (!test(inode, data)) {
787 spin_unlock(&inode->i_lock);
738 continue; 788 continue;
789 }
739 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 790 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
740 __wait_on_freeing_inode(inode); 791 __wait_on_freeing_inode(inode);
741 goto repeat; 792 goto repeat;
742 } 793 }
743 __iget(inode); 794 __iget(inode);
795 spin_unlock(&inode->i_lock);
744 return inode; 796 return inode;
745 } 797 }
746 return NULL; 798 return NULL;
@@ -758,15 +810,21 @@ static struct inode *find_inode_fast(struct super_block *sb,
758 810
759repeat: 811repeat:
760 hlist_for_each_entry(inode, node, head, i_hash) { 812 hlist_for_each_entry(inode, node, head, i_hash) {
761 if (inode->i_ino != ino) 813 spin_lock(&inode->i_lock);
814 if (inode->i_ino != ino) {
815 spin_unlock(&inode->i_lock);
762 continue; 816 continue;
763 if (inode->i_sb != sb) 817 }
818 if (inode->i_sb != sb) {
819 spin_unlock(&inode->i_lock);
764 continue; 820 continue;
821 }
765 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 822 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
766 __wait_on_freeing_inode(inode); 823 __wait_on_freeing_inode(inode);
767 goto repeat; 824 goto repeat;
768 } 825 }
769 __iget(inode); 826 __iget(inode);
827 spin_unlock(&inode->i_lock);
770 return inode; 828 return inode;
771 } 829 }
772 return NULL; 830 return NULL;
@@ -826,19 +884,26 @@ struct inode *new_inode(struct super_block *sb)
826{ 884{
827 struct inode *inode; 885 struct inode *inode;
828 886
829 spin_lock_prefetch(&inode_lock); 887 spin_lock_prefetch(&inode_sb_list_lock);
830 888
831 inode = alloc_inode(sb); 889 inode = alloc_inode(sb);
832 if (inode) { 890 if (inode) {
833 spin_lock(&inode_lock); 891 spin_lock(&inode->i_lock);
834 __inode_sb_list_add(inode);
835 inode->i_state = 0; 892 inode->i_state = 0;
836 spin_unlock(&inode_lock); 893 spin_unlock(&inode->i_lock);
894 inode_sb_list_add(inode);
837 } 895 }
838 return inode; 896 return inode;
839} 897}
840EXPORT_SYMBOL(new_inode); 898EXPORT_SYMBOL(new_inode);
841 899
900/**
901 * unlock_new_inode - clear the I_NEW state and wake up any waiters
902 * @inode: new inode to unlock
903 *
904 * Called when the inode is fully initialised to clear the new state of the
905 * inode and wake up anyone waiting for the inode to finish initialisation.
906 */
842void unlock_new_inode(struct inode *inode) 907void unlock_new_inode(struct inode *inode)
843{ 908{
844#ifdef CONFIG_DEBUG_LOCK_ALLOC 909#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -858,51 +923,67 @@ void unlock_new_inode(struct inode *inode)
858 } 923 }
859 } 924 }
860#endif 925#endif
861 /* 926 spin_lock(&inode->i_lock);
862 * This is special! We do not need the spinlock when clearing I_NEW,
863 * because we're guaranteed that nobody else tries to do anything about
864 * the state of the inode when it is locked, as we just created it (so
865 * there can be no old holders that haven't tested I_NEW).
866 * However we must emit the memory barrier so that other CPUs reliably
867 * see the clearing of I_NEW after the other inode initialisation has
868 * completed.
869 */
870 smp_mb();
871 WARN_ON(!(inode->i_state & I_NEW)); 927 WARN_ON(!(inode->i_state & I_NEW));
872 inode->i_state &= ~I_NEW; 928 inode->i_state &= ~I_NEW;
873 wake_up_inode(inode); 929 wake_up_bit(&inode->i_state, __I_NEW);
930 spin_unlock(&inode->i_lock);
874} 931}
875EXPORT_SYMBOL(unlock_new_inode); 932EXPORT_SYMBOL(unlock_new_inode);
876 933
877/* 934/**
878 * This is called without the inode lock held.. Be careful. 935 * iget5_locked - obtain an inode from a mounted file system
936 * @sb: super block of file system
937 * @hashval: hash value (usually inode number) to get
938 * @test: callback used for comparisons between inodes
939 * @set: callback used to initialize a new struct inode
940 * @data: opaque data pointer to pass to @test and @set
941 *
942 * Search for the inode specified by @hashval and @data in the inode cache,
943 * and if present it is return it with an increased reference count. This is
944 * a generalized version of iget_locked() for file systems where the inode
945 * number is not sufficient for unique identification of an inode.
946 *
947 * If the inode is not in cache, allocate a new inode and return it locked,
948 * hashed, and with the I_NEW flag set. The file system gets to fill it in
949 * before unlocking it via unlock_new_inode().
879 * 950 *
880 * We no longer cache the sb_flags in i_flags - see fs.h 951 * Note both @test and @set are called with the inode_hash_lock held, so can't
881 * -- rmk@arm.uk.linux.org 952 * sleep.
882 */ 953 */
883static struct inode *get_new_inode(struct super_block *sb, 954struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
884 struct hlist_head *head, 955 int (*test)(struct inode *, void *),
885 int (*test)(struct inode *, void *), 956 int (*set)(struct inode *, void *), void *data)
886 int (*set)(struct inode *, void *),
887 void *data)
888{ 957{
958 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
889 struct inode *inode; 959 struct inode *inode;
890 960
961 spin_lock(&inode_hash_lock);
962 inode = find_inode(sb, head, test, data);
963 spin_unlock(&inode_hash_lock);
964
965 if (inode) {
966 wait_on_inode(inode);
967 return inode;
968 }
969
891 inode = alloc_inode(sb); 970 inode = alloc_inode(sb);
892 if (inode) { 971 if (inode) {
893 struct inode *old; 972 struct inode *old;
894 973
895 spin_lock(&inode_lock); 974 spin_lock(&inode_hash_lock);
896 /* We released the lock, so.. */ 975 /* We released the lock, so.. */
897 old = find_inode(sb, head, test, data); 976 old = find_inode(sb, head, test, data);
898 if (!old) { 977 if (!old) {
899 if (set(inode, data)) 978 if (set(inode, data))
900 goto set_failed; 979 goto set_failed;
901 980
902 hlist_add_head(&inode->i_hash, head); 981 spin_lock(&inode->i_lock);
903 __inode_sb_list_add(inode);
904 inode->i_state = I_NEW; 982 inode->i_state = I_NEW;
905 spin_unlock(&inode_lock); 983 hlist_add_head(&inode->i_hash, head);
984 spin_unlock(&inode->i_lock);
985 inode_sb_list_add(inode);
986 spin_unlock(&inode_hash_lock);
906 987
907 /* Return the locked inode with I_NEW set, the 988 /* Return the locked inode with I_NEW set, the
908 * caller is responsible for filling in the contents 989 * caller is responsible for filling in the contents
@@ -915,7 +996,7 @@ static struct inode *get_new_inode(struct super_block *sb,
915 * us. Use the old inode instead of the one we just 996 * us. Use the old inode instead of the one we just
916 * allocated. 997 * allocated.
917 */ 998 */
918 spin_unlock(&inode_lock); 999 spin_unlock(&inode_hash_lock);
919 destroy_inode(inode); 1000 destroy_inode(inode);
920 inode = old; 1001 inode = old;
921 wait_on_inode(inode); 1002 wait_on_inode(inode);
@@ -923,33 +1004,53 @@ static struct inode *get_new_inode(struct super_block *sb,
923 return inode; 1004 return inode;
924 1005
925set_failed: 1006set_failed:
926 spin_unlock(&inode_lock); 1007 spin_unlock(&inode_hash_lock);
927 destroy_inode(inode); 1008 destroy_inode(inode);
928 return NULL; 1009 return NULL;
929} 1010}
1011EXPORT_SYMBOL(iget5_locked);
930 1012
931/* 1013/**
932 * get_new_inode_fast is the fast path version of get_new_inode, see the 1014 * iget_locked - obtain an inode from a mounted file system
933 * comment at iget_locked for details. 1015 * @sb: super block of file system
1016 * @ino: inode number to get
1017 *
1018 * Search for the inode specified by @ino in the inode cache and if present
1019 * return it with an increased reference count. This is for file systems
1020 * where the inode number is sufficient for unique identification of an inode.
1021 *
1022 * If the inode is not in cache, allocate a new inode and return it locked,
1023 * hashed, and with the I_NEW flag set. The file system gets to fill it in
1024 * before unlocking it via unlock_new_inode().
934 */ 1025 */
935static struct inode *get_new_inode_fast(struct super_block *sb, 1026struct inode *iget_locked(struct super_block *sb, unsigned long ino)
936 struct hlist_head *head, unsigned long ino)
937{ 1027{
1028 struct hlist_head *head = inode_hashtable + hash(sb, ino);
938 struct inode *inode; 1029 struct inode *inode;
939 1030
1031 spin_lock(&inode_hash_lock);
1032 inode = find_inode_fast(sb, head, ino);
1033 spin_unlock(&inode_hash_lock);
1034 if (inode) {
1035 wait_on_inode(inode);
1036 return inode;
1037 }
1038
940 inode = alloc_inode(sb); 1039 inode = alloc_inode(sb);
941 if (inode) { 1040 if (inode) {
942 struct inode *old; 1041 struct inode *old;
943 1042
944 spin_lock(&inode_lock); 1043 spin_lock(&inode_hash_lock);
945 /* We released the lock, so.. */ 1044 /* We released the lock, so.. */
946 old = find_inode_fast(sb, head, ino); 1045 old = find_inode_fast(sb, head, ino);
947 if (!old) { 1046 if (!old) {
948 inode->i_ino = ino; 1047 inode->i_ino = ino;
949 hlist_add_head(&inode->i_hash, head); 1048 spin_lock(&inode->i_lock);
950 __inode_sb_list_add(inode);
951 inode->i_state = I_NEW; 1049 inode->i_state = I_NEW;
952 spin_unlock(&inode_lock); 1050 hlist_add_head(&inode->i_hash, head);
1051 spin_unlock(&inode->i_lock);
1052 inode_sb_list_add(inode);
1053 spin_unlock(&inode_hash_lock);
953 1054
954 /* Return the locked inode with I_NEW set, the 1055 /* Return the locked inode with I_NEW set, the
955 * caller is responsible for filling in the contents 1056 * caller is responsible for filling in the contents
@@ -962,13 +1063,14 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
962 * us. Use the old inode instead of the one we just 1063 * us. Use the old inode instead of the one we just
963 * allocated. 1064 * allocated.
964 */ 1065 */
965 spin_unlock(&inode_lock); 1066 spin_unlock(&inode_hash_lock);
966 destroy_inode(inode); 1067 destroy_inode(inode);
967 inode = old; 1068 inode = old;
968 wait_on_inode(inode); 1069 wait_on_inode(inode);
969 } 1070 }
970 return inode; 1071 return inode;
971} 1072}
1073EXPORT_SYMBOL(iget_locked);
972 1074
973/* 1075/*
974 * search the inode cache for a matching inode number. 1076 * search the inode cache for a matching inode number.
@@ -983,10 +1085,14 @@ static int test_inode_iunique(struct super_block *sb, unsigned long ino)
983 struct hlist_node *node; 1085 struct hlist_node *node;
984 struct inode *inode; 1086 struct inode *inode;
985 1087
1088 spin_lock(&inode_hash_lock);
986 hlist_for_each_entry(inode, node, b, i_hash) { 1089 hlist_for_each_entry(inode, node, b, i_hash) {
987 if (inode->i_ino == ino && inode->i_sb == sb) 1090 if (inode->i_ino == ino && inode->i_sb == sb) {
1091 spin_unlock(&inode_hash_lock);
988 return 0; 1092 return 0;
1093 }
989 } 1094 }
1095 spin_unlock(&inode_hash_lock);
990 1096
991 return 1; 1097 return 1;
992} 1098}
@@ -1016,7 +1122,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
1016 static unsigned int counter; 1122 static unsigned int counter;
1017 ino_t res; 1123 ino_t res;
1018 1124
1019 spin_lock(&inode_lock);
1020 spin_lock(&iunique_lock); 1125 spin_lock(&iunique_lock);
1021 do { 1126 do {
1022 if (counter <= max_reserved) 1127 if (counter <= max_reserved)
@@ -1024,7 +1129,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
1024 res = counter++; 1129 res = counter++;
1025 } while (!test_inode_iunique(sb, res)); 1130 } while (!test_inode_iunique(sb, res));
1026 spin_unlock(&iunique_lock); 1131 spin_unlock(&iunique_lock);
1027 spin_unlock(&inode_lock);
1028 1132
1029 return res; 1133 return res;
1030} 1134}
@@ -1032,116 +1136,50 @@ EXPORT_SYMBOL(iunique);
1032 1136
1033struct inode *igrab(struct inode *inode) 1137struct inode *igrab(struct inode *inode)
1034{ 1138{
1035 spin_lock(&inode_lock); 1139 spin_lock(&inode->i_lock);
1036 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) 1140 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
1037 __iget(inode); 1141 __iget(inode);
1038 else 1142 spin_unlock(&inode->i_lock);
1143 } else {
1144 spin_unlock(&inode->i_lock);
1039 /* 1145 /*
1040 * Handle the case where s_op->clear_inode is not been 1146 * Handle the case where s_op->clear_inode is not been
1041 * called yet, and somebody is calling igrab 1147 * called yet, and somebody is calling igrab
1042 * while the inode is getting freed. 1148 * while the inode is getting freed.
1043 */ 1149 */
1044 inode = NULL; 1150 inode = NULL;
1045 spin_unlock(&inode_lock); 1151 }
1046 return inode; 1152 return inode;
1047} 1153}
1048EXPORT_SYMBOL(igrab); 1154EXPORT_SYMBOL(igrab);
1049 1155
1050/** 1156/**
1051 * ifind - internal function, you want ilookup5() or iget5().
1052 * @sb: super block of file system to search
1053 * @head: the head of the list to search
1054 * @test: callback used for comparisons between inodes
1055 * @data: opaque data pointer to pass to @test
1056 * @wait: if true wait for the inode to be unlocked, if false do not
1057 *
1058 * ifind() searches for the inode specified by @data in the inode
1059 * cache. This is a generalized version of ifind_fast() for file systems where
1060 * the inode number is not sufficient for unique identification of an inode.
1061 *
1062 * If the inode is in the cache, the inode is returned with an incremented
1063 * reference count.
1064 *
1065 * Otherwise NULL is returned.
1066 *
1067 * Note, @test is called with the inode_lock held, so can't sleep.
1068 */
1069static struct inode *ifind(struct super_block *sb,
1070 struct hlist_head *head, int (*test)(struct inode *, void *),
1071 void *data, const int wait)
1072{
1073 struct inode *inode;
1074
1075 spin_lock(&inode_lock);
1076 inode = find_inode(sb, head, test, data);
1077 if (inode) {
1078 spin_unlock(&inode_lock);
1079 if (likely(wait))
1080 wait_on_inode(inode);
1081 return inode;
1082 }
1083 spin_unlock(&inode_lock);
1084 return NULL;
1085}
1086
1087/**
1088 * ifind_fast - internal function, you want ilookup() or iget().
1089 * @sb: super block of file system to search
1090 * @head: head of the list to search
1091 * @ino: inode number to search for
1092 *
1093 * ifind_fast() searches for the inode @ino in the inode cache. This is for
1094 * file systems where the inode number is sufficient for unique identification
1095 * of an inode.
1096 *
1097 * If the inode is in the cache, the inode is returned with an incremented
1098 * reference count.
1099 *
1100 * Otherwise NULL is returned.
1101 */
1102static struct inode *ifind_fast(struct super_block *sb,
1103 struct hlist_head *head, unsigned long ino)
1104{
1105 struct inode *inode;
1106
1107 spin_lock(&inode_lock);
1108 inode = find_inode_fast(sb, head, ino);
1109 if (inode) {
1110 spin_unlock(&inode_lock);
1111 wait_on_inode(inode);
1112 return inode;
1113 }
1114 spin_unlock(&inode_lock);
1115 return NULL;
1116}
1117
1118/**
1119 * ilookup5_nowait - search for an inode in the inode cache 1157 * ilookup5_nowait - search for an inode in the inode cache
1120 * @sb: super block of file system to search 1158 * @sb: super block of file system to search
1121 * @hashval: hash value (usually inode number) to search for 1159 * @hashval: hash value (usually inode number) to search for
1122 * @test: callback used for comparisons between inodes 1160 * @test: callback used for comparisons between inodes
1123 * @data: opaque data pointer to pass to @test 1161 * @data: opaque data pointer to pass to @test
1124 * 1162 *
1125 * ilookup5() uses ifind() to search for the inode specified by @hashval and 1163 * Search for the inode specified by @hashval and @data in the inode cache.
1126 * @data in the inode cache. This is a generalized version of ilookup() for
1127 * file systems where the inode number is not sufficient for unique
1128 * identification of an inode.
1129 *
1130 * If the inode is in the cache, the inode is returned with an incremented 1164 * If the inode is in the cache, the inode is returned with an incremented
1131 * reference count. Note, the inode lock is not waited upon so you have to be 1165 * reference count.
1132 * very careful what you do with the returned inode. You probably should be
1133 * using ilookup5() instead.
1134 * 1166 *
1135 * Otherwise NULL is returned. 1167 * Note: I_NEW is not waited upon so you have to be very careful what you do
1168 * with the returned inode. You probably should be using ilookup5() instead.
1136 * 1169 *
1137 * Note, @test is called with the inode_lock held, so can't sleep. 1170 * Note2: @test is called with the inode_hash_lock held, so can't sleep.
1138 */ 1171 */
1139struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, 1172struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
1140 int (*test)(struct inode *, void *), void *data) 1173 int (*test)(struct inode *, void *), void *data)
1141{ 1174{
1142 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1175 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1176 struct inode *inode;
1177
1178 spin_lock(&inode_hash_lock);
1179 inode = find_inode(sb, head, test, data);
1180 spin_unlock(&inode_hash_lock);
1143 1181
1144 return ifind(sb, head, test, data, 0); 1182 return inode;
1145} 1183}
1146EXPORT_SYMBOL(ilookup5_nowait); 1184EXPORT_SYMBOL(ilookup5_nowait);
1147 1185
@@ -1152,24 +1190,24 @@ EXPORT_SYMBOL(ilookup5_nowait);
1152 * @test: callback used for comparisons between inodes 1190 * @test: callback used for comparisons between inodes
1153 * @data: opaque data pointer to pass to @test 1191 * @data: opaque data pointer to pass to @test
1154 * 1192 *
1155 * ilookup5() uses ifind() to search for the inode specified by @hashval and 1193 * Search for the inode specified by @hashval and @data in the inode cache,
1156 * @data in the inode cache. This is a generalized version of ilookup() for 1194 * and if the inode is in the cache, return the inode with an incremented
1157 * file systems where the inode number is not sufficient for unique 1195 * reference count. Waits on I_NEW before returning the inode.
1158 * identification of an inode.
1159 *
1160 * If the inode is in the cache, the inode lock is waited upon and the inode is
1161 * returned with an incremented reference count. 1196 * returned with an incremented reference count.
1162 * 1197 *
1163 * Otherwise NULL is returned. 1198 * This is a generalized version of ilookup() for file systems where the
1199 * inode number is not sufficient for unique identification of an inode.
1164 * 1200 *
1165 * Note, @test is called with the inode_lock held, so can't sleep. 1201 * Note: @test is called with the inode_hash_lock held, so can't sleep.
1166 */ 1202 */
1167struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 1203struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
1168 int (*test)(struct inode *, void *), void *data) 1204 int (*test)(struct inode *, void *), void *data)
1169{ 1205{
1170 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1206 struct inode *inode = ilookup5_nowait(sb, hashval, test, data);
1171 1207
1172 return ifind(sb, head, test, data, 1); 1208 if (inode)
1209 wait_on_inode(inode);
1210 return inode;
1173} 1211}
1174EXPORT_SYMBOL(ilookup5); 1212EXPORT_SYMBOL(ilookup5);
1175 1213
@@ -1178,91 +1216,23 @@ EXPORT_SYMBOL(ilookup5);
1178 * @sb: super block of file system to search 1216 * @sb: super block of file system to search
1179 * @ino: inode number to search for 1217 * @ino: inode number to search for
1180 * 1218 *
1181 * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache. 1219 * Search for the inode @ino in the inode cache, and if the inode is in the
1182 * This is for file systems where the inode number is sufficient for unique 1220 * cache, the inode is returned with an incremented reference count.
1183 * identification of an inode.
1184 *
1185 * If the inode is in the cache, the inode is returned with an incremented
1186 * reference count.
1187 *
1188 * Otherwise NULL is returned.
1189 */ 1221 */
1190struct inode *ilookup(struct super_block *sb, unsigned long ino) 1222struct inode *ilookup(struct super_block *sb, unsigned long ino)
1191{ 1223{
1192 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1224 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1193
1194 return ifind_fast(sb, head, ino);
1195}
1196EXPORT_SYMBOL(ilookup);
1197
1198/**
1199 * iget5_locked - obtain an inode from a mounted file system
1200 * @sb: super block of file system
1201 * @hashval: hash value (usually inode number) to get
1202 * @test: callback used for comparisons between inodes
1203 * @set: callback used to initialize a new struct inode
1204 * @data: opaque data pointer to pass to @test and @set
1205 *
1206 * iget5_locked() uses ifind() to search for the inode specified by @hashval
1207 * and @data in the inode cache and if present it is returned with an increased
1208 * reference count. This is a generalized version of iget_locked() for file
1209 * systems where the inode number is not sufficient for unique identification
1210 * of an inode.
1211 *
1212 * If the inode is not in cache, get_new_inode() is called to allocate a new
1213 * inode and this is returned locked, hashed, and with the I_NEW flag set. The
1214 * file system gets to fill it in before unlocking it via unlock_new_inode().
1215 *
1216 * Note both @test and @set are called with the inode_lock held, so can't sleep.
1217 */
1218struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
1219 int (*test)(struct inode *, void *),
1220 int (*set)(struct inode *, void *), void *data)
1221{
1222 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1223 struct inode *inode; 1225 struct inode *inode;
1224 1226
1225 inode = ifind(sb, head, test, data, 1); 1227 spin_lock(&inode_hash_lock);
1226 if (inode) 1228 inode = find_inode_fast(sb, head, ino);
1227 return inode; 1229 spin_unlock(&inode_hash_lock);
1228 /*
1229 * get_new_inode() will do the right thing, re-trying the search
1230 * in case it had to block at any point.
1231 */
1232 return get_new_inode(sb, head, test, set, data);
1233}
1234EXPORT_SYMBOL(iget5_locked);
1235
1236/**
1237 * iget_locked - obtain an inode from a mounted file system
1238 * @sb: super block of file system
1239 * @ino: inode number to get
1240 *
1241 * iget_locked() uses ifind_fast() to search for the inode specified by @ino in
1242 * the inode cache and if present it is returned with an increased reference
1243 * count. This is for file systems where the inode number is sufficient for
1244 * unique identification of an inode.
1245 *
1246 * If the inode is not in cache, get_new_inode_fast() is called to allocate a
1247 * new inode and this is returned locked, hashed, and with the I_NEW flag set.
1248 * The file system gets to fill it in before unlocking it via
1249 * unlock_new_inode().
1250 */
1251struct inode *iget_locked(struct super_block *sb, unsigned long ino)
1252{
1253 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1254 struct inode *inode;
1255 1230
1256 inode = ifind_fast(sb, head, ino);
1257 if (inode) 1231 if (inode)
1258 return inode; 1232 wait_on_inode(inode);
1259 /* 1233 return inode;
1260 * get_new_inode_fast() will do the right thing, re-trying the search
1261 * in case it had to block at any point.
1262 */
1263 return get_new_inode_fast(sb, head, ino);
1264} 1234}
1265EXPORT_SYMBOL(iget_locked); 1235EXPORT_SYMBOL(ilookup);
1266 1236
1267int insert_inode_locked(struct inode *inode) 1237int insert_inode_locked(struct inode *inode)
1268{ 1238{
@@ -1270,27 +1240,33 @@ int insert_inode_locked(struct inode *inode)
1270 ino_t ino = inode->i_ino; 1240 ino_t ino = inode->i_ino;
1271 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1241 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1272 1242
1273 inode->i_state |= I_NEW;
1274 while (1) { 1243 while (1) {
1275 struct hlist_node *node; 1244 struct hlist_node *node;
1276 struct inode *old = NULL; 1245 struct inode *old = NULL;
1277 spin_lock(&inode_lock); 1246 spin_lock(&inode_hash_lock);
1278 hlist_for_each_entry(old, node, head, i_hash) { 1247 hlist_for_each_entry(old, node, head, i_hash) {
1279 if (old->i_ino != ino) 1248 if (old->i_ino != ino)
1280 continue; 1249 continue;
1281 if (old->i_sb != sb) 1250 if (old->i_sb != sb)
1282 continue; 1251 continue;
1283 if (old->i_state & (I_FREEING|I_WILL_FREE)) 1252 spin_lock(&old->i_lock);
1253 if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1254 spin_unlock(&old->i_lock);
1284 continue; 1255 continue;
1256 }
1285 break; 1257 break;
1286 } 1258 }
1287 if (likely(!node)) { 1259 if (likely(!node)) {
1260 spin_lock(&inode->i_lock);
1261 inode->i_state |= I_NEW;
1288 hlist_add_head(&inode->i_hash, head); 1262 hlist_add_head(&inode->i_hash, head);
1289 spin_unlock(&inode_lock); 1263 spin_unlock(&inode->i_lock);
1264 spin_unlock(&inode_hash_lock);
1290 return 0; 1265 return 0;
1291 } 1266 }
1292 __iget(old); 1267 __iget(old);
1293 spin_unlock(&inode_lock); 1268 spin_unlock(&old->i_lock);
1269 spin_unlock(&inode_hash_lock);
1294 wait_on_inode(old); 1270 wait_on_inode(old);
1295 if (unlikely(!inode_unhashed(old))) { 1271 if (unlikely(!inode_unhashed(old))) {
1296 iput(old); 1272 iput(old);
@@ -1307,29 +1283,34 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1307 struct super_block *sb = inode->i_sb; 1283 struct super_block *sb = inode->i_sb;
1308 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1284 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1309 1285
1310 inode->i_state |= I_NEW;
1311
1312 while (1) { 1286 while (1) {
1313 struct hlist_node *node; 1287 struct hlist_node *node;
1314 struct inode *old = NULL; 1288 struct inode *old = NULL;
1315 1289
1316 spin_lock(&inode_lock); 1290 spin_lock(&inode_hash_lock);
1317 hlist_for_each_entry(old, node, head, i_hash) { 1291 hlist_for_each_entry(old, node, head, i_hash) {
1318 if (old->i_sb != sb) 1292 if (old->i_sb != sb)
1319 continue; 1293 continue;
1320 if (!test(old, data)) 1294 if (!test(old, data))
1321 continue; 1295 continue;
1322 if (old->i_state & (I_FREEING|I_WILL_FREE)) 1296 spin_lock(&old->i_lock);
1297 if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1298 spin_unlock(&old->i_lock);
1323 continue; 1299 continue;
1300 }
1324 break; 1301 break;
1325 } 1302 }
1326 if (likely(!node)) { 1303 if (likely(!node)) {
1304 spin_lock(&inode->i_lock);
1305 inode->i_state |= I_NEW;
1327 hlist_add_head(&inode->i_hash, head); 1306 hlist_add_head(&inode->i_hash, head);
1328 spin_unlock(&inode_lock); 1307 spin_unlock(&inode->i_lock);
1308 spin_unlock(&inode_hash_lock);
1329 return 0; 1309 return 0;
1330 } 1310 }
1331 __iget(old); 1311 __iget(old);
1332 spin_unlock(&inode_lock); 1312 spin_unlock(&old->i_lock);
1313 spin_unlock(&inode_hash_lock);
1333 wait_on_inode(old); 1314 wait_on_inode(old);
1334 if (unlikely(!inode_unhashed(old))) { 1315 if (unlikely(!inode_unhashed(old))) {
1335 iput(old); 1316 iput(old);
@@ -1374,47 +1355,35 @@ static void iput_final(struct inode *inode)
1374 const struct super_operations *op = inode->i_sb->s_op; 1355 const struct super_operations *op = inode->i_sb->s_op;
1375 int drop; 1356 int drop;
1376 1357
1358 WARN_ON(inode->i_state & I_NEW);
1359
1377 if (op && op->drop_inode) 1360 if (op && op->drop_inode)
1378 drop = op->drop_inode(inode); 1361 drop = op->drop_inode(inode);
1379 else 1362 else
1380 drop = generic_drop_inode(inode); 1363 drop = generic_drop_inode(inode);
1381 1364
1365 if (!drop && (sb->s_flags & MS_ACTIVE)) {
1366 inode->i_state |= I_REFERENCED;
1367 if (!(inode->i_state & (I_DIRTY|I_SYNC)))
1368 inode_lru_list_add(inode);
1369 spin_unlock(&inode->i_lock);
1370 return;
1371 }
1372
1382 if (!drop) { 1373 if (!drop) {
1383 if (sb->s_flags & MS_ACTIVE) {
1384 inode->i_state |= I_REFERENCED;
1385 if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
1386 inode_lru_list_add(inode);
1387 }
1388 spin_unlock(&inode_lock);
1389 return;
1390 }
1391 WARN_ON(inode->i_state & I_NEW);
1392 inode->i_state |= I_WILL_FREE; 1374 inode->i_state |= I_WILL_FREE;
1393 spin_unlock(&inode_lock); 1375 spin_unlock(&inode->i_lock);
1394 write_inode_now(inode, 1); 1376 write_inode_now(inode, 1);
1395 spin_lock(&inode_lock); 1377 spin_lock(&inode->i_lock);
1396 WARN_ON(inode->i_state & I_NEW); 1378 WARN_ON(inode->i_state & I_NEW);
1397 inode->i_state &= ~I_WILL_FREE; 1379 inode->i_state &= ~I_WILL_FREE;
1398 __remove_inode_hash(inode);
1399 } 1380 }
1400 1381
1401 WARN_ON(inode->i_state & I_NEW);
1402 inode->i_state |= I_FREEING; 1382 inode->i_state |= I_FREEING;
1403
1404 /*
1405 * Move the inode off the IO lists and LRU once I_FREEING is
1406 * set so that it won't get moved back on there if it is dirty.
1407 */
1408 inode_lru_list_del(inode); 1383 inode_lru_list_del(inode);
1409 list_del_init(&inode->i_wb_list); 1384 spin_unlock(&inode->i_lock);
1410 1385
1411 __inode_sb_list_del(inode);
1412 spin_unlock(&inode_lock);
1413 evict(inode); 1386 evict(inode);
1414 remove_inode_hash(inode);
1415 wake_up_inode(inode);
1416 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
1417 destroy_inode(inode);
1418} 1387}
1419 1388
1420/** 1389/**
@@ -1431,7 +1400,7 @@ void iput(struct inode *inode)
1431 if (inode) { 1400 if (inode) {
1432 BUG_ON(inode->i_state & I_CLEAR); 1401 BUG_ON(inode->i_state & I_CLEAR);
1433 1402
1434 if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) 1403 if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock))
1435 iput_final(inode); 1404 iput_final(inode);
1436 } 1405 }
1437} 1406}
@@ -1610,9 +1579,8 @@ EXPORT_SYMBOL(inode_wait);
1610 * to recheck inode state. 1579 * to recheck inode state.
1611 * 1580 *
1612 * It doesn't matter if I_NEW is not set initially, a call to 1581 * It doesn't matter if I_NEW is not set initially, a call to
1613 * wake_up_inode() after removing from the hash list will DTRT. 1582 * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
1614 * 1583 * will DTRT.
1615 * This is called with inode_lock held.
1616 */ 1584 */
1617static void __wait_on_freeing_inode(struct inode *inode) 1585static void __wait_on_freeing_inode(struct inode *inode)
1618{ 1586{
@@ -1620,10 +1588,11 @@ static void __wait_on_freeing_inode(struct inode *inode)
1620 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); 1588 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
1621 wq = bit_waitqueue(&inode->i_state, __I_NEW); 1589 wq = bit_waitqueue(&inode->i_state, __I_NEW);
1622 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1590 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
1623 spin_unlock(&inode_lock); 1591 spin_unlock(&inode->i_lock);
1592 spin_unlock(&inode_hash_lock);
1624 schedule(); 1593 schedule();
1625 finish_wait(wq, &wait.wait); 1594 finish_wait(wq, &wait.wait);
1626 spin_lock(&inode_lock); 1595 spin_lock(&inode_hash_lock);
1627} 1596}
1628 1597
1629static __initdata unsigned long ihash_entries; 1598static __initdata unsigned long ihash_entries;
@@ -1715,7 +1684,7 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
1715EXPORT_SYMBOL(init_special_inode); 1684EXPORT_SYMBOL(init_special_inode);
1716 1685
1717/** 1686/**
1718 * Init uid,gid,mode for new inode according to posix standards 1687 * inode_init_owner - Init uid,gid,mode for new inode according to posix standards
1719 * @inode: New inode 1688 * @inode: New inode
1720 * @dir: Directory inode 1689 * @dir: Directory inode
1721 * @mode: mode of the new inode 1690 * @mode: mode of the new inode
@@ -1733,3 +1702,22 @@ void inode_init_owner(struct inode *inode, const struct inode *dir,
1733 inode->i_mode = mode; 1702 inode->i_mode = mode;
1734} 1703}
1735EXPORT_SYMBOL(inode_init_owner); 1704EXPORT_SYMBOL(inode_init_owner);
1705
1706/**
1707 * inode_owner_or_capable - check current task permissions to inode
1708 * @inode: inode being checked
1709 *
1710 * Return true if current either has CAP_FOWNER to the inode, or
1711 * owns the file.
1712 */
1713bool inode_owner_or_capable(const struct inode *inode)
1714{
1715 struct user_namespace *ns = inode_userns(inode);
1716
1717 if (current_user_ns() == ns && current_fsuid() == inode->i_uid)
1718 return true;
1719 if (ns_capable(ns, CAP_FOWNER))
1720 return true;
1721 return false;
1722}
1723EXPORT_SYMBOL(inode_owner_or_capable);
diff --git a/fs/internal.h b/fs/internal.h
index 17191546d527..b29c46e4e32f 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -64,6 +64,7 @@ extern int copy_mount_string(const void __user *, char **);
64 64
65extern unsigned int mnt_get_count(struct vfsmount *mnt); 65extern unsigned int mnt_get_count(struct vfsmount *mnt);
66extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); 66extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
67extern struct vfsmount *lookup_mnt(struct path *);
67extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, 68extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
68 struct vfsmount *); 69 struct vfsmount *);
69extern void release_mounts(struct list_head *); 70extern void release_mounts(struct list_head *);
@@ -124,6 +125,13 @@ extern long do_handle_open(int mountdirfd,
124/* 125/*
125 * inode.c 126 * inode.c
126 */ 127 */
128extern spinlock_t inode_sb_list_lock;
129
130/*
131 * fs-writeback.c
132 */
133extern void inode_wb_list_del(struct inode *inode);
134
127extern int get_nr_dirty_inodes(void); 135extern int get_nr_dirty_inodes(void);
128extern void evict_inodes(struct super_block *); 136extern void evict_inodes(struct super_block *);
129extern int invalidate_inodes(struct super_block *, bool); 137extern int invalidate_inodes(struct super_block *, bool);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1eebeb72b202..1d9b9fcb2db4 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -548,6 +548,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
548{ 548{
549 int error = 0; 549 int error = 0;
550 int __user *argp = (int __user *)arg; 550 int __user *argp = (int __user *)arg;
551 struct inode *inode = filp->f_path.dentry->d_inode;
551 552
552 switch (cmd) { 553 switch (cmd) {
553 case FIOCLEX: 554 case FIOCLEX:
@@ -567,13 +568,11 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
567 break; 568 break;
568 569
569 case FIOQSIZE: 570 case FIOQSIZE:
570 if (S_ISDIR(filp->f_path.dentry->d_inode->i_mode) || 571 if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
571 S_ISREG(filp->f_path.dentry->d_inode->i_mode) || 572 S_ISLNK(inode->i_mode)) {
572 S_ISLNK(filp->f_path.dentry->d_inode->i_mode)) { 573 loff_t res = inode_get_bytes(inode);
573 loff_t res = 574 error = copy_to_user(argp, &res, sizeof(res)) ?
574 inode_get_bytes(filp->f_path.dentry->d_inode); 575 -EFAULT : 0;
575 error = copy_to_user((loff_t __user *)arg, &res,
576 sizeof(res)) ? -EFAULT : 0;
577 } else 576 } else
578 error = -ENOTTY; 577 error = -ENOTTY;
579 break; 578 break;
@@ -590,14 +589,10 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
590 return ioctl_fiemap(filp, arg); 589 return ioctl_fiemap(filp, arg);
591 590
592 case FIGETBSZ: 591 case FIGETBSZ:
593 { 592 return put_user(inode->i_sb->s_blocksize, argp);
594 struct inode *inode = filp->f_path.dentry->d_inode;
595 int __user *p = (int __user *)arg;
596 return put_user(inode->i_sb->s_blocksize, p);
597 }
598 593
599 default: 594 default:
600 if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) 595 if (S_ISREG(inode->i_mode))
601 error = file_ioctl(filp, cmd, arg); 596 error = file_ioctl(filp, cmd, arg);
602 else 597 else
603 error = vfs_ioctl(filp, cmd, arg); 598 error = vfs_ioctl(filp, cmd, arg);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index a0f3833c0dbf..3db5ba4568fc 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1158,7 +1158,6 @@ static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
1158 1158
1159static const struct address_space_operations isofs_aops = { 1159static const struct address_space_operations isofs_aops = {
1160 .readpage = isofs_readpage, 1160 .readpage = isofs_readpage,
1161 .sync_page = block_sync_page,
1162 .bmap = _isofs_bmap 1161 .bmap = _isofs_bmap
1163}; 1162};
1164 1163
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 34a4861c14b8..da871ee084d3 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -20,6 +20,7 @@
20#include <linux/mm.h> 20#include <linux/mm.h>
21#include <linux/pagemap.h> 21#include <linux/pagemap.h>
22#include <linux/bio.h> 22#include <linux/bio.h>
23#include <linux/blkdev.h>
23 24
24/* 25/*
25 * Default IO end handler for temporary BJ_IO buffer_heads. 26 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -294,7 +295,7 @@ void journal_commit_transaction(journal_t *journal)
294 int first_tag = 0; 295 int first_tag = 0;
295 int tag_flag; 296 int tag_flag;
296 int i; 297 int i;
297 int write_op = WRITE_SYNC; 298 struct blk_plug plug;
298 299
299 /* 300 /*
300 * First job: lock down the current transaction and wait for 301 * First job: lock down the current transaction and wait for
@@ -327,13 +328,6 @@ void journal_commit_transaction(journal_t *journal)
327 spin_lock(&journal->j_state_lock); 328 spin_lock(&journal->j_state_lock);
328 commit_transaction->t_state = T_LOCKED; 329 commit_transaction->t_state = T_LOCKED;
329 330
330 /*
331 * Use plugged writes here, since we want to submit several before
332 * we unplug the device. We don't do explicit unplugging in here,
333 * instead we rely on sync_buffer() doing the unplug for us.
334 */
335 if (commit_transaction->t_synchronous_commit)
336 write_op = WRITE_SYNC_PLUG;
337 spin_lock(&commit_transaction->t_handle_lock); 331 spin_lock(&commit_transaction->t_handle_lock);
338 while (commit_transaction->t_updates) { 332 while (commit_transaction->t_updates) {
339 DEFINE_WAIT(wait); 333 DEFINE_WAIT(wait);
@@ -418,8 +412,10 @@ void journal_commit_transaction(journal_t *journal)
418 * Now start flushing things to disk, in the order they appear 412 * Now start flushing things to disk, in the order they appear
419 * on the transaction lists. Data blocks go first. 413 * on the transaction lists. Data blocks go first.
420 */ 414 */
415 blk_start_plug(&plug);
421 err = journal_submit_data_buffers(journal, commit_transaction, 416 err = journal_submit_data_buffers(journal, commit_transaction,
422 write_op); 417 WRITE_SYNC);
418 blk_finish_plug(&plug);
423 419
424 /* 420 /*
425 * Wait for all previously submitted IO to complete. 421 * Wait for all previously submitted IO to complete.
@@ -480,7 +476,9 @@ void journal_commit_transaction(journal_t *journal)
480 err = 0; 476 err = 0;
481 } 477 }
482 478
483 journal_write_revoke_records(journal, commit_transaction, write_op); 479 blk_start_plug(&plug);
480
481 journal_write_revoke_records(journal, commit_transaction, WRITE_SYNC);
484 482
485 /* 483 /*
486 * If we found any dirty or locked buffers, then we should have 484 * If we found any dirty or locked buffers, then we should have
@@ -650,7 +648,7 @@ start_journal_io:
650 clear_buffer_dirty(bh); 648 clear_buffer_dirty(bh);
651 set_buffer_uptodate(bh); 649 set_buffer_uptodate(bh);
652 bh->b_end_io = journal_end_buffer_io_sync; 650 bh->b_end_io = journal_end_buffer_io_sync;
653 submit_bh(write_op, bh); 651 submit_bh(WRITE_SYNC, bh);
654 } 652 }
655 cond_resched(); 653 cond_resched();
656 654
@@ -661,6 +659,8 @@ start_journal_io:
661 } 659 }
662 } 660 }
663 661
662 blk_finish_plug(&plug);
663
664 /* Lo and behold: we have just managed to send a transaction to 664 /* Lo and behold: we have just managed to send a transaction to
665 the log. Before we can commit it, wait for the IO so far to 665 the log. Before we can commit it, wait for the IO so far to
666 complete. Control buffers being written are on the 666 complete. Control buffers being written are on the
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f3ad1598b201..fa36d7662b21 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -137,9 +137,9 @@ static int journal_submit_commit_record(journal_t *journal,
137 if (journal->j_flags & JBD2_BARRIER && 137 if (journal->j_flags & JBD2_BARRIER &&
138 !JBD2_HAS_INCOMPAT_FEATURE(journal, 138 !JBD2_HAS_INCOMPAT_FEATURE(journal,
139 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) 139 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
140 ret = submit_bh(WRITE_SYNC_PLUG | WRITE_FLUSH_FUA, bh); 140 ret = submit_bh(WRITE_SYNC | WRITE_FLUSH_FUA, bh);
141 else 141 else
142 ret = submit_bh(WRITE_SYNC_PLUG, bh); 142 ret = submit_bh(WRITE_SYNC, bh);
143 143
144 *cbh = bh; 144 *cbh = bh;
145 return ret; 145 return ret;
@@ -329,7 +329,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
329 int tag_bytes = journal_tag_bytes(journal); 329 int tag_bytes = journal_tag_bytes(journal);
330 struct buffer_head *cbh = NULL; /* For transactional checksums */ 330 struct buffer_head *cbh = NULL; /* For transactional checksums */
331 __u32 crc32_sum = ~0; 331 __u32 crc32_sum = ~0;
332 int write_op = WRITE_SYNC; 332 struct blk_plug plug;
333 333
334 /* 334 /*
335 * First job: lock down the current transaction and wait for 335 * First job: lock down the current transaction and wait for
@@ -363,13 +363,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
363 write_lock(&journal->j_state_lock); 363 write_lock(&journal->j_state_lock);
364 commit_transaction->t_state = T_LOCKED; 364 commit_transaction->t_state = T_LOCKED;
365 365
366 /*
367 * Use plugged writes here, since we want to submit several before
368 * we unplug the device. We don't do explicit unplugging in here,
369 * instead we rely on sync_buffer() doing the unplug for us.
370 */
371 if (commit_transaction->t_synchronous_commit)
372 write_op = WRITE_SYNC_PLUG;
373 trace_jbd2_commit_locking(journal, commit_transaction); 366 trace_jbd2_commit_locking(journal, commit_transaction);
374 stats.run.rs_wait = commit_transaction->t_max_wait; 367 stats.run.rs_wait = commit_transaction->t_max_wait;
375 stats.run.rs_locked = jiffies; 368 stats.run.rs_locked = jiffies;
@@ -469,8 +462,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
469 if (err) 462 if (err)
470 jbd2_journal_abort(journal, err); 463 jbd2_journal_abort(journal, err);
471 464
465 blk_start_plug(&plug);
472 jbd2_journal_write_revoke_records(journal, commit_transaction, 466 jbd2_journal_write_revoke_records(journal, commit_transaction,
473 write_op); 467 WRITE_SYNC);
468 blk_finish_plug(&plug);
474 469
475 jbd_debug(3, "JBD: commit phase 2\n"); 470 jbd_debug(3, "JBD: commit phase 2\n");
476 471
@@ -497,6 +492,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
497 err = 0; 492 err = 0;
498 descriptor = NULL; 493 descriptor = NULL;
499 bufs = 0; 494 bufs = 0;
495 blk_start_plug(&plug);
500 while (commit_transaction->t_buffers) { 496 while (commit_transaction->t_buffers) {
501 497
502 /* Find the next buffer to be journaled... */ 498 /* Find the next buffer to be journaled... */
@@ -658,7 +654,7 @@ start_journal_io:
658 clear_buffer_dirty(bh); 654 clear_buffer_dirty(bh);
659 set_buffer_uptodate(bh); 655 set_buffer_uptodate(bh);
660 bh->b_end_io = journal_end_buffer_io_sync; 656 bh->b_end_io = journal_end_buffer_io_sync;
661 submit_bh(write_op, bh); 657 submit_bh(WRITE_SYNC, bh);
662 } 658 }
663 cond_resched(); 659 cond_resched();
664 stats.run.rs_blocks_logged += bufs; 660 stats.run.rs_blocks_logged += bufs;
@@ -699,6 +695,8 @@ start_journal_io:
699 __jbd2_journal_abort_hard(journal); 695 __jbd2_journal_abort_hard(journal);
700 } 696 }
701 697
698 blk_finish_plug(&plug);
699
702 /* Lo and behold: we have just managed to send a transaction to 700 /* Lo and behold: we have just managed to send a transaction to
703 the log. Before we can commit it, wait for the IO so far to 701 the log. Before we can commit it, wait for the IO so far to
704 complete. Control buffers being written are on the 702 complete. Control buffers being written are on the
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 95b79672150a..828a0e1ea438 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -402,7 +402,7 @@ static int jffs2_acl_setxattr(struct dentry *dentry, const char *name,
402 402
403 if (name[0] != '\0') 403 if (name[0] != '\0')
404 return -EINVAL; 404 return -EINVAL;
405 if (!is_owner_or_cap(dentry->d_inode)) 405 if (!inode_owner_or_capable(dentry->d_inode))
406 return -EPERM; 406 return -EPERM;
407 407
408 if (value) { 408 if (value) {
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index fd05a0b9431d..5a001020c542 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -40,12 +40,13 @@ static z_stream inf_strm, def_strm;
40 40
41static int __init alloc_workspaces(void) 41static int __init alloc_workspaces(void)
42{ 42{
43 def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); 43 def_strm.workspace = vmalloc(zlib_deflate_workspacesize(MAX_WBITS,
44 MAX_MEM_LEVEL));
44 if (!def_strm.workspace) { 45 if (!def_strm.workspace) {
45 printk(KERN_WARNING "Failed to allocate %d bytes for deflate workspace\n", zlib_deflate_workspacesize()); 46 printk(KERN_WARNING "Failed to allocate %d bytes for deflate workspace\n", zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL));
46 return -ENOMEM; 47 return -ENOMEM;
47 } 48 }
48 D1(printk(KERN_DEBUG "Allocated %d bytes for deflate workspace\n", zlib_deflate_workspacesize())); 49 D1(printk(KERN_DEBUG "Allocated %d bytes for deflate workspace\n", zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL)));
49 inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); 50 inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
50 if (!inf_strm.workspace) { 51 if (!inf_strm.workspace) {
51 printk(KERN_WARNING "Failed to allocate %d bytes for inflate workspace\n", zlib_inflate_workspacesize()); 52 printk(KERN_WARNING "Failed to allocate %d bytes for inflate workspace\n", zlib_inflate_workspacesize());
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 4f9cc0482949..3e93cdd19005 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -31,7 +31,7 @@
31 * is used to release xattr name/value pair and detach from c->xattrindex. 31 * is used to release xattr name/value pair and detach from c->xattrindex.
32 * reclaim_xattr_datum(c) 32 * reclaim_xattr_datum(c)
33 * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when 33 * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when
34 * memory usage by cache is over c->xdatum_mem_threshold. Currently, this threshold 34 * memory usage by cache is over c->xdatum_mem_threshold. Currently, this threshold
35 * is hard coded as 32KiB. 35 * is hard coded as 32KiB.
36 * do_verify_xattr_datum(c, xd) 36 * do_verify_xattr_datum(c, xd)
37 * is used to load the xdatum informations without name/value pair from the medium. 37 * is used to load the xdatum informations without name/value pair from the medium.
diff --git a/fs/jfs/Makefile b/fs/jfs/Makefile
index 3adb6395e42d..a58fa72d7e59 100644
--- a/fs/jfs/Makefile
+++ b/fs/jfs/Makefile
@@ -13,4 +13,4 @@ jfs-y := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \
13 13
14jfs-$(CONFIG_JFS_POSIX_ACL) += acl.o 14jfs-$(CONFIG_JFS_POSIX_ACL) += acl.o
15 15
16EXTRA_CFLAGS += -D_JFS_4K 16ccflags-y := -D_JFS_4K
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 9978803ceedc..eddbb373209e 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -352,7 +352,6 @@ const struct address_space_operations jfs_aops = {
352 .readpages = jfs_readpages, 352 .readpages = jfs_readpages,
353 .writepage = jfs_writepage, 353 .writepage = jfs_writepage,
354 .writepages = jfs_writepages, 354 .writepages = jfs_writepages,
355 .sync_page = block_sync_page,
356 .write_begin = jfs_write_begin, 355 .write_begin = jfs_write_begin,
357 .write_end = nobh_write_end, 356 .write_end = nobh_write_end,
358 .bmap = jfs_bmap, 357 .bmap = jfs_bmap,
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index afe222bf300f..6f98a1866776 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -72,7 +72,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
72 if (err) 72 if (err)
73 return err; 73 return err;
74 74
75 if (!is_owner_or_cap(inode)) { 75 if (!inode_owner_or_capable(inode)) {
76 err = -EACCES; 76 err = -EACCES;
77 goto setflags_out; 77 goto setflags_out;
78 } 78 }
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 48b44bd8267b..6740d34cd82b 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -583,7 +583,6 @@ static void metapage_invalidatepage(struct page *page, unsigned long offset)
583const struct address_space_operations jfs_metapage_aops = { 583const struct address_space_operations jfs_metapage_aops = {
584 .readpage = metapage_readpage, 584 .readpage = metapage_readpage,
585 .writepage = metapage_writepage, 585 .writepage = metapage_writepage,
586 .sync_page = block_sync_page,
587 .releasepage = metapage_releasepage, 586 .releasepage = metapage_releasepage,
588 .invalidatepage = metapage_invalidatepage, 587 .invalidatepage = metapage_invalidatepage,
589 .set_page_dirty = __set_page_dirty_nobuffers, 588 .set_page_dirty = __set_page_dirty_nobuffers,
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 3fa4c32272df..24838f1eeee5 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -678,7 +678,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
678 struct posix_acl *acl; 678 struct posix_acl *acl;
679 int rc; 679 int rc;
680 680
681 if (!is_owner_or_cap(inode)) 681 if (!inode_owner_or_capable(inode))
682 return -EPERM; 682 return -EPERM;
683 683
684 /* 684 /*
diff --git a/fs/locks.c b/fs/locks.c
index 822c3d1843af..0a4f50dfadfb 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -414,17 +414,7 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
414 fl->fl_ops = NULL; 414 fl->fl_ops = NULL;
415 fl->fl_lmops = NULL; 415 fl->fl_lmops = NULL;
416 416
417 switch (l->l_type) { 417 return assign_type(fl, l->l_type);
418 case F_RDLCK:
419 case F_WRLCK:
420 case F_UNLCK:
421 fl->fl_type = l->l_type;
422 break;
423 default:
424 return -EINVAL;
425 }
426
427 return (0);
428} 418}
429#endif 419#endif
430 420
diff --git a/fs/logfs/compr.c b/fs/logfs/compr.c
index 44bbfd249abc..961f02b86d97 100644
--- a/fs/logfs/compr.c
+++ b/fs/logfs/compr.c
@@ -81,7 +81,7 @@ error:
81 81
82int __init logfs_compr_init(void) 82int __init logfs_compr_init(void)
83{ 83{
84 size_t size = max(zlib_deflate_workspacesize(), 84 size_t size = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
85 zlib_inflate_workspacesize()); 85 zlib_inflate_workspacesize());
86 stream.workspace = vmalloc(size); 86 stream.workspace = vmalloc(size);
87 if (!stream.workspace) 87 if (!stream.workspace)
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 723bc5bca09a..1adc8d455f0e 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -39,7 +39,6 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
39 bio.bi_end_io = request_complete; 39 bio.bi_end_io = request_complete;
40 40
41 submit_bio(rw, &bio); 41 submit_bio(rw, &bio);
42 generic_unplug_device(bdev_get_queue(bdev));
43 wait_for_completion(&complete); 42 wait_for_completion(&complete);
44 return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO; 43 return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO;
45} 44}
@@ -168,7 +167,6 @@ static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len)
168 } 167 }
169 len = PAGE_ALIGN(len); 168 len = PAGE_ALIGN(len);
170 __bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT); 169 __bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
171 generic_unplug_device(bdev_get_queue(logfs_super(sb)->s_bdev));
172} 170}
173 171
174 172
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index e86376b87af1..c2ad7028def4 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -196,7 +196,7 @@ long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
196 if (IS_RDONLY(inode)) 196 if (IS_RDONLY(inode))
197 return -EROFS; 197 return -EROFS;
198 198
199 if (!is_owner_or_cap(inode)) 199 if (!inode_owner_or_capable(inode))
200 return -EACCES; 200 return -EACCES;
201 201
202 err = get_user(flags, (int __user *)arg); 202 err = get_user(flags, (int __user *)arg);
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index 03b8c240aeda..edfea7a3a747 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -293,7 +293,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc)
293 return ret; 293 return ret;
294} 294}
295 295
296/* called with inode_lock held */ 296/* called with inode->i_lock held */
297static int logfs_drop_inode(struct inode *inode) 297static int logfs_drop_inode(struct inode *inode)
298{ 298{
299 struct logfs_super *super = logfs_super(inode->i_sb); 299 struct logfs_super *super = logfs_super(inode->i_sb);
diff --git a/fs/minix/Kconfig b/fs/minix/Kconfig
index 0fd7ca994264..6624684dd5de 100644
--- a/fs/minix/Kconfig
+++ b/fs/minix/Kconfig
@@ -15,3 +15,11 @@ config MINIX_FS
15 module will be called minix. Note that the file system of your root 15 module will be called minix. Note that the file system of your root
16 partition (the one containing the directory /) cannot be compiled as 16 partition (the one containing the directory /) cannot be compiled as
17 a module. 17 a module.
18
19config MINIX_FS_NATIVE_ENDIAN
20 def_bool MINIX_FS
21 depends on H8300 || M32R || MICROBLAZE || MIPS || S390 || SUPERH || SPARC || XTENSA || (M68K && !MMU)
22
23config MINIX_FS_BIG_ENDIAN_16BIT_INDEXED
24 def_bool MINIX_FS
25 depends on M68K && MMU
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index ae0b83f476a6..adcdc0a4e182 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -399,7 +399,6 @@ static sector_t minix_bmap(struct address_space *mapping, sector_t block)
399static const struct address_space_operations minix_aops = { 399static const struct address_space_operations minix_aops = {
400 .readpage = minix_readpage, 400 .readpage = minix_readpage,
401 .writepage = minix_writepage, 401 .writepage = minix_writepage,
402 .sync_page = block_sync_page,
403 .write_begin = minix_write_begin, 402 .write_begin = minix_write_begin,
404 .write_end = generic_write_end, 403 .write_end = generic_write_end,
405 .bmap = minix_bmap 404 .bmap = minix_bmap
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 407b1c84911e..341e2122879a 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -88,4 +88,78 @@ static inline struct minix_inode_info *minix_i(struct inode *inode)
88 return list_entry(inode, struct minix_inode_info, vfs_inode); 88 return list_entry(inode, struct minix_inode_info, vfs_inode);
89} 89}
90 90
91#if defined(CONFIG_MINIX_FS_NATIVE_ENDIAN) && \
92 defined(CONFIG_MINIX_FS_BIG_ENDIAN_16BIT_INDEXED)
93
94#error Minix file system byte order broken
95
96#elif defined(CONFIG_MINIX_FS_NATIVE_ENDIAN)
97
98/*
99 * big-endian 32 or 64 bit indexed bitmaps on big-endian system or
100 * little-endian bitmaps on little-endian system
101 */
102
103#define minix_test_and_set_bit(nr, addr) \
104 __test_and_set_bit((nr), (unsigned long *)(addr))
105#define minix_set_bit(nr, addr) \
106 __set_bit((nr), (unsigned long *)(addr))
107#define minix_test_and_clear_bit(nr, addr) \
108 __test_and_clear_bit((nr), (unsigned long *)(addr))
109#define minix_test_bit(nr, addr) \
110 test_bit((nr), (unsigned long *)(addr))
111#define minix_find_first_zero_bit(addr, size) \
112 find_first_zero_bit((unsigned long *)(addr), (size))
113
114#elif defined(CONFIG_MINIX_FS_BIG_ENDIAN_16BIT_INDEXED)
115
116/*
117 * big-endian 16bit indexed bitmaps
118 */
119
120static inline int minix_find_first_zero_bit(const void *vaddr, unsigned size)
121{
122 const unsigned short *p = vaddr, *addr = vaddr;
123 unsigned short num;
124
125 if (!size)
126 return 0;
127
128 size = (size >> 4) + ((size & 15) > 0);
129 while (*p++ == 0xffff) {
130 if (--size == 0)
131 return (p - addr) << 4;
132 }
133
134 num = *--p;
135 return ((p - addr) << 4) + ffz(num);
136}
137
138#define minix_test_and_set_bit(nr, addr) \
139 __test_and_set_bit((nr) ^ 16, (unsigned long *)(addr))
140#define minix_set_bit(nr, addr) \
141 __set_bit((nr) ^ 16, (unsigned long *)(addr))
142#define minix_test_and_clear_bit(nr, addr) \
143 __test_and_clear_bit((nr) ^ 16, (unsigned long *)(addr))
144
145static inline int minix_test_bit(int nr, const void *vaddr)
146{
147 const unsigned short *p = vaddr;
148 return (p[nr >> 4] & (1U << (nr & 15))) != 0;
149}
150
151#else
152
153/*
154 * little-endian bitmaps
155 */
156
157#define minix_test_and_set_bit __test_and_set_bit_le
158#define minix_set_bit __set_bit_le
159#define minix_test_and_clear_bit __test_and_clear_bit_le
160#define minix_test_bit test_bit_le
161#define minix_find_first_zero_bit find_first_zero_bit_le
162
163#endif
164
91#endif /* FS_MINIX_H */ 165#endif /* FS_MINIX_H */
diff --git a/fs/mpage.c b/fs/mpage.c
index d78455a81ec9..0afc809e46e0 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -364,6 +364,9 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
364 sector_t last_block_in_bio = 0; 364 sector_t last_block_in_bio = 0;
365 struct buffer_head map_bh; 365 struct buffer_head map_bh;
366 unsigned long first_logical_block = 0; 366 unsigned long first_logical_block = 0;
367 struct blk_plug plug;
368
369 blk_start_plug(&plug);
367 370
368 map_bh.b_state = 0; 371 map_bh.b_state = 0;
369 map_bh.b_size = 0; 372 map_bh.b_size = 0;
@@ -385,6 +388,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
385 BUG_ON(!list_empty(pages)); 388 BUG_ON(!list_empty(pages));
386 if (bio) 389 if (bio)
387 mpage_bio_submit(READ, bio); 390 mpage_bio_submit(READ, bio);
391 blk_finish_plug(&plug);
388 return 0; 392 return 0;
389} 393}
390EXPORT_SYMBOL(mpage_readpages); 394EXPORT_SYMBOL(mpage_readpages);
@@ -666,8 +670,11 @@ int
666mpage_writepages(struct address_space *mapping, 670mpage_writepages(struct address_space *mapping,
667 struct writeback_control *wbc, get_block_t get_block) 671 struct writeback_control *wbc, get_block_t get_block)
668{ 672{
673 struct blk_plug plug;
669 int ret; 674 int ret;
670 675
676 blk_start_plug(&plug);
677
671 if (!get_block) 678 if (!get_block)
672 ret = generic_writepages(mapping, wbc); 679 ret = generic_writepages(mapping, wbc);
673 else { 680 else {
@@ -682,6 +689,7 @@ mpage_writepages(struct address_space *mapping,
682 if (mpd.bio) 689 if (mpd.bio)
683 mpage_bio_submit(WRITE, mpd.bio); 690 mpage_bio_submit(WRITE, mpd.bio);
684 } 691 }
692 blk_finish_plug(&plug);
685 return ret; 693 return ret;
686} 694}
687EXPORT_SYMBOL(mpage_writepages); 695EXPORT_SYMBOL(mpage_writepages);
diff --git a/fs/namei.c b/fs/namei.c
index 5a9a6c3094da..3cb616d38d9c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -183,6 +183,9 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag
183 183
184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
185 185
186 if (current_user_ns() != inode_userns(inode))
187 goto other_perms;
188
186 if (current_fsuid() == inode->i_uid) 189 if (current_fsuid() == inode->i_uid)
187 mode >>= 6; 190 mode >>= 6;
188 else { 191 else {
@@ -196,6 +199,7 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag
196 mode >>= 3; 199 mode >>= 3;
197 } 200 }
198 201
202other_perms:
199 /* 203 /*
200 * If the DACs are ok we don't need any capability check. 204 * If the DACs are ok we don't need any capability check.
201 */ 205 */
@@ -237,7 +241,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
237 * Executable DACs are overridable if at least one exec bit is set. 241 * Executable DACs are overridable if at least one exec bit is set.
238 */ 242 */
239 if (!(mask & MAY_EXEC) || execute_ok(inode)) 243 if (!(mask & MAY_EXEC) || execute_ok(inode))
240 if (capable(CAP_DAC_OVERRIDE)) 244 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
241 return 0; 245 return 0;
242 246
243 /* 247 /*
@@ -245,7 +249,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
245 */ 249 */
246 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 250 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
247 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 251 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
248 if (capable(CAP_DAC_READ_SEARCH)) 252 if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
249 return 0; 253 return 0;
250 254
251 return -EACCES; 255 return -EACCES;
@@ -654,6 +658,7 @@ static inline int handle_reval_path(struct nameidata *nd)
654static inline int exec_permission(struct inode *inode, unsigned int flags) 658static inline int exec_permission(struct inode *inode, unsigned int flags)
655{ 659{
656 int ret; 660 int ret;
661 struct user_namespace *ns = inode_userns(inode);
657 662
658 if (inode->i_op->permission) { 663 if (inode->i_op->permission) {
659 ret = inode->i_op->permission(inode, MAY_EXEC, flags); 664 ret = inode->i_op->permission(inode, MAY_EXEC, flags);
@@ -666,7 +671,8 @@ static inline int exec_permission(struct inode *inode, unsigned int flags)
666 if (ret == -ECHILD) 671 if (ret == -ECHILD)
667 return ret; 672 return ret;
668 673
669 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) 674 if (ns_capable(ns, CAP_DAC_OVERRIDE) ||
675 ns_capable(ns, CAP_DAC_READ_SEARCH))
670 goto ok; 676 goto ok;
671 677
672 return ret; 678 return ret;
@@ -986,6 +992,12 @@ int follow_down_one(struct path *path)
986 return 0; 992 return 0;
987} 993}
988 994
995static inline bool managed_dentry_might_block(struct dentry *dentry)
996{
997 return (dentry->d_flags & DCACHE_MANAGE_TRANSIT &&
998 dentry->d_op->d_manage(dentry, true) < 0);
999}
1000
989/* 1001/*
990 * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we 1002 * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we
991 * meet a managed dentry and we're not walking to "..". True is returned to 1003 * meet a managed dentry and we're not walking to "..". True is returned to
@@ -994,19 +1006,26 @@ int follow_down_one(struct path *path)
994static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, 1006static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
995 struct inode **inode, bool reverse_transit) 1007 struct inode **inode, bool reverse_transit)
996{ 1008{
997 while (d_mountpoint(path->dentry)) { 1009 for (;;) {
998 struct vfsmount *mounted; 1010 struct vfsmount *mounted;
999 if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) && 1011 /*
1000 !reverse_transit && 1012 * Don't forget we might have a non-mountpoint managed dentry
1001 path->dentry->d_op->d_manage(path->dentry, true) < 0) 1013 * that wants to block transit.
1014 */
1015 *inode = path->dentry->d_inode;
1016 if (!reverse_transit &&
1017 unlikely(managed_dentry_might_block(path->dentry)))
1002 return false; 1018 return false;
1019
1020 if (!d_mountpoint(path->dentry))
1021 break;
1022
1003 mounted = __lookup_mnt(path->mnt, path->dentry, 1); 1023 mounted = __lookup_mnt(path->mnt, path->dentry, 1);
1004 if (!mounted) 1024 if (!mounted)
1005 break; 1025 break;
1006 path->mnt = mounted; 1026 path->mnt = mounted;
1007 path->dentry = mounted->mnt_root; 1027 path->dentry = mounted->mnt_root;
1008 nd->seq = read_seqcount_begin(&path->dentry->d_seq); 1028 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
1009 *inode = path->dentry->d_inode;
1010 } 1029 }
1011 1030
1012 if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) 1031 if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
@@ -1644,13 +1663,16 @@ static int path_lookupat(int dfd, const char *name,
1644 err = -ECHILD; 1663 err = -ECHILD;
1645 } 1664 }
1646 1665
1647 if (!err) 1666 if (!err) {
1648 err = handle_reval_path(nd); 1667 err = handle_reval_path(nd);
1668 if (err)
1669 path_put(&nd->path);
1670 }
1649 1671
1650 if (!err && nd->flags & LOOKUP_DIRECTORY) { 1672 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1651 if (!nd->inode->i_op->lookup) { 1673 if (!nd->inode->i_op->lookup) {
1652 path_put(&nd->path); 1674 path_put(&nd->path);
1653 return -ENOTDIR; 1675 err = -ENOTDIR;
1654 } 1676 }
1655 } 1677 }
1656 1678
@@ -1842,11 +1864,15 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
1842 1864
1843 if (!(dir->i_mode & S_ISVTX)) 1865 if (!(dir->i_mode & S_ISVTX))
1844 return 0; 1866 return 0;
1867 if (current_user_ns() != inode_userns(inode))
1868 goto other_userns;
1845 if (inode->i_uid == fsuid) 1869 if (inode->i_uid == fsuid)
1846 return 0; 1870 return 0;
1847 if (dir->i_uid == fsuid) 1871 if (dir->i_uid == fsuid)
1848 return 0; 1872 return 0;
1849 return !capable(CAP_FOWNER); 1873
1874other_userns:
1875 return !ns_capable(inode_userns(inode), CAP_FOWNER);
1850} 1876}
1851 1877
1852/* 1878/*
@@ -2026,7 +2052,7 @@ static int may_open(struct path *path, int acc_mode, int flag)
2026 } 2052 }
2027 2053
2028 /* O_NOATIME can only be set by the owner or superuser */ 2054 /* O_NOATIME can only be set by the owner or superuser */
2029 if (flag & O_NOATIME && !is_owner_or_cap(inode)) 2055 if (flag & O_NOATIME && !inode_owner_or_capable(inode))
2030 return -EPERM; 2056 return -EPERM;
2031 2057
2032 /* 2058 /*
@@ -2440,7 +2466,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2440 if (error) 2466 if (error)
2441 return error; 2467 return error;
2442 2468
2443 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) 2469 if ((S_ISCHR(mode) || S_ISBLK(mode)) &&
2470 !ns_capable(inode_userns(dir), CAP_MKNOD))
2444 return -EPERM; 2471 return -EPERM;
2445 2472
2446 if (!dir->i_op->mknod) 2473 if (!dir->i_op->mknod)
diff --git a/fs/namespace.c b/fs/namespace.c
index 9263995bf6a1..7dba2ed03429 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2701,7 +2701,7 @@ void __init mnt_init(void)
2701 if (!mount_hashtable) 2701 if (!mount_hashtable)
2702 panic("Failed to allocate mount hash table\n"); 2702 panic("Failed to allocate mount hash table\n");
2703 2703
2704 printk("Mount-cache hash table entries: %lu\n", HASH_SIZE); 2704 printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE);
2705 2705
2706 for (u = 0; u < HASH_SIZE; u++) 2706 for (u = 0; u < HASH_SIZE; u++)
2707 INIT_LIST_HEAD(&mount_hashtable[u]); 2707 INIT_LIST_HEAD(&mount_hashtable[u]);
diff --git a/fs/ncpfs/Makefile b/fs/ncpfs/Makefile
index 68ea095100a8..c66af563f2ce 100644
--- a/fs/ncpfs/Makefile
+++ b/fs/ncpfs/Makefile
@@ -11,6 +11,6 @@ ncpfs-$(CONFIG_NCPFS_EXTRAS) += symlink.o
11ncpfs-$(CONFIG_NCPFS_NFS_NS) += symlink.o 11ncpfs-$(CONFIG_NCPFS_NFS_NS) += symlink.o
12 12
13# If you want debugging output, please uncomment the following line 13# If you want debugging output, please uncomment the following line
14# EXTRA_CFLAGS += -DDEBUG_NCP=1 14# ccflags-y := -DDEBUG_NCP=1
15 15
16CFLAGS_ncplib_kernel.o := -finline-functions 16CFLAGS_ncplib_kernel.o := -finline-functions
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index abdf38d5971d..7237672216c8 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -44,6 +44,7 @@
44/* #define NFS_DEBUG_VERBOSE 1 */ 44/* #define NFS_DEBUG_VERBOSE 1 */
45 45
46static int nfs_opendir(struct inode *, struct file *); 46static int nfs_opendir(struct inode *, struct file *);
47static int nfs_closedir(struct inode *, struct file *);
47static int nfs_readdir(struct file *, void *, filldir_t); 48static int nfs_readdir(struct file *, void *, filldir_t);
48static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); 49static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
49static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); 50static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
@@ -64,7 +65,7 @@ const struct file_operations nfs_dir_operations = {
64 .read = generic_read_dir, 65 .read = generic_read_dir,
65 .readdir = nfs_readdir, 66 .readdir = nfs_readdir,
66 .open = nfs_opendir, 67 .open = nfs_opendir,
67 .release = nfs_release, 68 .release = nfs_closedir,
68 .fsync = nfs_fsync_dir, 69 .fsync = nfs_fsync_dir,
69}; 70};
70 71
@@ -133,13 +134,35 @@ const struct inode_operations nfs4_dir_inode_operations = {
133 134
134#endif /* CONFIG_NFS_V4 */ 135#endif /* CONFIG_NFS_V4 */
135 136
137static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct rpc_cred *cred)
138{
139 struct nfs_open_dir_context *ctx;
140 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
141 if (ctx != NULL) {
142 ctx->duped = 0;
143 ctx->dir_cookie = 0;
144 ctx->dup_cookie = 0;
145 ctx->cred = get_rpccred(cred);
146 } else
147 ctx = ERR_PTR(-ENOMEM);
148 return ctx;
149}
150
151static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx)
152{
153 put_rpccred(ctx->cred);
154 kfree(ctx);
155}
156
136/* 157/*
137 * Open file 158 * Open file
138 */ 159 */
139static int 160static int
140nfs_opendir(struct inode *inode, struct file *filp) 161nfs_opendir(struct inode *inode, struct file *filp)
141{ 162{
142 int res; 163 int res = 0;
164 struct nfs_open_dir_context *ctx;
165 struct rpc_cred *cred;
143 166
144 dfprintk(FILE, "NFS: open dir(%s/%s)\n", 167 dfprintk(FILE, "NFS: open dir(%s/%s)\n",
145 filp->f_path.dentry->d_parent->d_name.name, 168 filp->f_path.dentry->d_parent->d_name.name,
@@ -147,8 +170,15 @@ nfs_opendir(struct inode *inode, struct file *filp)
147 170
148 nfs_inc_stats(inode, NFSIOS_VFSOPEN); 171 nfs_inc_stats(inode, NFSIOS_VFSOPEN);
149 172
150 /* Call generic open code in order to cache credentials */ 173 cred = rpc_lookup_cred();
151 res = nfs_open(inode, filp); 174 if (IS_ERR(cred))
175 return PTR_ERR(cred);
176 ctx = alloc_nfs_open_dir_context(cred);
177 if (IS_ERR(ctx)) {
178 res = PTR_ERR(ctx);
179 goto out;
180 }
181 filp->private_data = ctx;
152 if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { 182 if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) {
153 /* This is a mountpoint, so d_revalidate will never 183 /* This is a mountpoint, so d_revalidate will never
154 * have been called, so we need to refresh the 184 * have been called, so we need to refresh the
@@ -156,9 +186,18 @@ nfs_opendir(struct inode *inode, struct file *filp)
156 */ 186 */
157 __nfs_revalidate_inode(NFS_SERVER(inode), inode); 187 __nfs_revalidate_inode(NFS_SERVER(inode), inode);
158 } 188 }
189out:
190 put_rpccred(cred);
159 return res; 191 return res;
160} 192}
161 193
194static int
195nfs_closedir(struct inode *inode, struct file *filp)
196{
197 put_nfs_open_dir_context(filp->private_data);
198 return 0;
199}
200
162struct nfs_cache_array_entry { 201struct nfs_cache_array_entry {
163 u64 cookie; 202 u64 cookie;
164 u64 ino; 203 u64 ino;
@@ -284,19 +323,20 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
284{ 323{
285 loff_t diff = desc->file->f_pos - desc->current_index; 324 loff_t diff = desc->file->f_pos - desc->current_index;
286 unsigned int index; 325 unsigned int index;
326 struct nfs_open_dir_context *ctx = desc->file->private_data;
287 327
288 if (diff < 0) 328 if (diff < 0)
289 goto out_eof; 329 goto out_eof;
290 if (diff >= array->size) { 330 if (diff >= array->size) {
291 if (array->eof_index >= 0) 331 if (array->eof_index >= 0)
292 goto out_eof; 332 goto out_eof;
293 desc->current_index += array->size;
294 return -EAGAIN; 333 return -EAGAIN;
295 } 334 }
296 335
297 index = (unsigned int)diff; 336 index = (unsigned int)diff;
298 *desc->dir_cookie = array->array[index].cookie; 337 *desc->dir_cookie = array->array[index].cookie;
299 desc->cache_entry_index = index; 338 desc->cache_entry_index = index;
339 ctx->duped = 0;
300 return 0; 340 return 0;
301out_eof: 341out_eof:
302 desc->eof = 1; 342 desc->eof = 1;
@@ -307,10 +347,18 @@ static
307int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) 347int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
308{ 348{
309 int i; 349 int i;
350 loff_t new_pos;
310 int status = -EAGAIN; 351 int status = -EAGAIN;
352 struct nfs_open_dir_context *ctx = desc->file->private_data;
311 353
312 for (i = 0; i < array->size; i++) { 354 for (i = 0; i < array->size; i++) {
313 if (array->array[i].cookie == *desc->dir_cookie) { 355 if (array->array[i].cookie == *desc->dir_cookie) {
356 new_pos = desc->current_index + i;
357 if (new_pos < desc->file->f_pos) {
358 ctx->dup_cookie = *desc->dir_cookie;
359 ctx->duped = 1;
360 }
361 desc->file->f_pos = new_pos;
314 desc->cache_entry_index = i; 362 desc->cache_entry_index = i;
315 return 0; 363 return 0;
316 } 364 }
@@ -342,6 +390,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
342 390
343 if (status == -EAGAIN) { 391 if (status == -EAGAIN) {
344 desc->last_cookie = array->last_cookie; 392 desc->last_cookie = array->last_cookie;
393 desc->current_index += array->size;
345 desc->page_index++; 394 desc->page_index++;
346 } 395 }
347 nfs_readdir_release_array(desc->page); 396 nfs_readdir_release_array(desc->page);
@@ -354,7 +403,8 @@ static
354int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, 403int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
355 struct nfs_entry *entry, struct file *file, struct inode *inode) 404 struct nfs_entry *entry, struct file *file, struct inode *inode)
356{ 405{
357 struct rpc_cred *cred = nfs_file_cred(file); 406 struct nfs_open_dir_context *ctx = file->private_data;
407 struct rpc_cred *cred = ctx->cred;
358 unsigned long timestamp, gencount; 408 unsigned long timestamp, gencount;
359 int error; 409 int error;
360 410
@@ -693,6 +743,20 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
693 int i = 0; 743 int i = 0;
694 int res = 0; 744 int res = 0;
695 struct nfs_cache_array *array = NULL; 745 struct nfs_cache_array *array = NULL;
746 struct nfs_open_dir_context *ctx = file->private_data;
747
748 if (ctx->duped != 0 && ctx->dup_cookie == *desc->dir_cookie) {
749 if (printk_ratelimit()) {
750 pr_notice("NFS: directory %s/%s contains a readdir loop. "
751 "Please contact your server vendor. "
752 "Offending cookie: %llu\n",
753 file->f_dentry->d_parent->d_name.name,
754 file->f_dentry->d_name.name,
755 *desc->dir_cookie);
756 }
757 res = -ELOOP;
758 goto out;
759 }
696 760
697 array = nfs_readdir_get_array(desc->page); 761 array = nfs_readdir_get_array(desc->page);
698 if (IS_ERR(array)) { 762 if (IS_ERR(array)) {
@@ -785,6 +849,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
785 struct inode *inode = dentry->d_inode; 849 struct inode *inode = dentry->d_inode;
786 nfs_readdir_descriptor_t my_desc, 850 nfs_readdir_descriptor_t my_desc,
787 *desc = &my_desc; 851 *desc = &my_desc;
852 struct nfs_open_dir_context *dir_ctx = filp->private_data;
788 int res; 853 int res;
789 854
790 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", 855 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
@@ -801,7 +866,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
801 memset(desc, 0, sizeof(*desc)); 866 memset(desc, 0, sizeof(*desc));
802 867
803 desc->file = filp; 868 desc->file = filp;
804 desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie; 869 desc->dir_cookie = &dir_ctx->dir_cookie;
805 desc->decode = NFS_PROTO(inode)->decode_dirent; 870 desc->decode = NFS_PROTO(inode)->decode_dirent;
806 desc->plus = NFS_USE_READDIRPLUS(inode); 871 desc->plus = NFS_USE_READDIRPLUS(inode);
807 872
@@ -853,6 +918,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
853{ 918{
854 struct dentry *dentry = filp->f_path.dentry; 919 struct dentry *dentry = filp->f_path.dentry;
855 struct inode *inode = dentry->d_inode; 920 struct inode *inode = dentry->d_inode;
921 struct nfs_open_dir_context *dir_ctx = filp->private_data;
856 922
857 dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", 923 dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n",
858 dentry->d_parent->d_name.name, 924 dentry->d_parent->d_name.name,
@@ -872,7 +938,8 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
872 } 938 }
873 if (offset != filp->f_pos) { 939 if (offset != filp->f_pos) {
874 filp->f_pos = offset; 940 filp->f_pos = offset;
875 nfs_file_open_context(filp)->dir_cookie = 0; 941 dir_ctx->dir_cookie = 0;
942 dir_ctx->duped = 0;
876 } 943 }
877out: 944out:
878 mutex_unlock(&inode->i_mutex); 945 mutex_unlock(&inode->i_mutex);
@@ -1068,7 +1135,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1068 if (fhandle == NULL || fattr == NULL) 1135 if (fhandle == NULL || fattr == NULL)
1069 goto out_error; 1136 goto out_error;
1070 1137
1071 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); 1138 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
1072 if (error) 1139 if (error)
1073 goto out_bad; 1140 goto out_bad;
1074 if (nfs_compare_fh(NFS_FH(inode), fhandle)) 1141 if (nfs_compare_fh(NFS_FH(inode), fhandle))
@@ -1224,7 +1291,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1224 parent = dentry->d_parent; 1291 parent = dentry->d_parent;
1225 /* Protect against concurrent sillydeletes */ 1292 /* Protect against concurrent sillydeletes */
1226 nfs_block_sillyrename(parent); 1293 nfs_block_sillyrename(parent);
1227 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); 1294 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
1228 if (error == -ENOENT) 1295 if (error == -ENOENT)
1229 goto no_entry; 1296 goto no_entry;
1230 if (error < 0) { 1297 if (error < 0) {
@@ -1562,7 +1629,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
1562 if (dentry->d_inode) 1629 if (dentry->d_inode)
1563 goto out; 1630 goto out;
1564 if (fhandle->size == 0) { 1631 if (fhandle->size == 0) {
1565 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); 1632 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
1566 if (error) 1633 if (error)
1567 goto out_error; 1634 goto out_error;
1568 } 1635 }
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d85a534b15cd..3ac5bd695e5e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -326,6 +326,9 @@ nfs_file_fsync(struct file *file, int datasync)
326 ret = xchg(&ctx->error, 0); 326 ret = xchg(&ctx->error, 0);
327 if (!ret && status < 0) 327 if (!ret && status < 0)
328 ret = status; 328 ret = status;
329 if (!ret && !datasync)
330 /* application has asked for meta-data sync */
331 ret = pnfs_layoutcommit_inode(inode, true);
329 return ret; 332 return ret;
330} 333}
331 334
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 1084792bc0fe..dcb61548887f 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -222,6 +222,10 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh,
222 goto out; 222 goto out;
223 } 223 }
224 224
225 if (fattr->valid & NFS_ATTR_FATTR_FSID &&
226 !nfs_fsid_equal(&server->fsid, &fattr->fsid))
227 memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
228
225 inode = nfs_fhget(sb, mntfh, fattr); 229 inode = nfs_fhget(sb, mntfh, fattr);
226 if (IS_ERR(inode)) { 230 if (IS_ERR(inode)) {
227 dprintk("nfs_get_root: get root inode failed\n"); 231 dprintk("nfs_get_root: get root inode failed\n");
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 01768e5e2c9b..57bb31ad7a5e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -254,7 +254,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
254 struct inode *inode = ERR_PTR(-ENOENT); 254 struct inode *inode = ERR_PTR(-ENOENT);
255 unsigned long hash; 255 unsigned long hash;
256 256
257 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) 257 nfs_attr_check_mountpoint(sb, fattr);
258
259 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0 && (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0)
258 goto out_no_inode; 260 goto out_no_inode;
259 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) 261 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0)
260 goto out_no_inode; 262 goto out_no_inode;
@@ -298,8 +300,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
298 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) 300 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
299 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); 301 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
300 /* Deal with crossing mountpoints */ 302 /* Deal with crossing mountpoints */
301 if ((fattr->valid & NFS_ATTR_FATTR_FSID) 303 if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||
302 && !nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { 304 fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
303 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) 305 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
304 inode->i_op = &nfs_referral_inode_operations; 306 inode->i_op = &nfs_referral_inode_operations;
305 else 307 else
@@ -639,7 +641,6 @@ struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cr
639 ctx->mode = f_mode; 641 ctx->mode = f_mode;
640 ctx->flags = 0; 642 ctx->flags = 0;
641 ctx->error = 0; 643 ctx->error = 0;
642 ctx->dir_cookie = 0;
643 nfs_init_lock_context(&ctx->lock_context); 644 nfs_init_lock_context(&ctx->lock_context);
644 ctx->lock_context.open_context = ctx; 645 ctx->lock_context.open_context = ctx;
645 INIT_LIST_HEAD(&ctx->list); 646 INIT_LIST_HEAD(&ctx->list);
@@ -1471,6 +1472,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1471 nfsi->delegation_state = 0; 1472 nfsi->delegation_state = 0;
1472 init_rwsem(&nfsi->rwsem); 1473 init_rwsem(&nfsi->rwsem);
1473 nfsi->layout = NULL; 1474 nfsi->layout = NULL;
1475 atomic_set(&nfsi->commits_outstanding, 0);
1474#endif 1476#endif
1475} 1477}
1476 1478
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 72e0bddf7a2f..ce118ce885dd 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -39,6 +39,12 @@ static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
39 return 0; 39 return 0;
40} 40}
41 41
42static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr)
43{
44 if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid))
45 fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT;
46}
47
42struct nfs_clone_mount { 48struct nfs_clone_mount {
43 const struct super_block *sb; 49 const struct super_block *sb;
44 const struct dentry *dentry; 50 const struct dentry *dentry;
@@ -214,6 +220,7 @@ extern const u32 nfs41_maxwrite_overhead;
214/* nfs4proc.c */ 220/* nfs4proc.c */
215#ifdef CONFIG_NFS_V4 221#ifdef CONFIG_NFS_V4
216extern struct rpc_procinfo nfs4_procedures[]; 222extern struct rpc_procinfo nfs4_procedures[];
223void nfs_fixup_secinfo_attributes(struct nfs_fattr *, struct nfs_fh *);
217#endif 224#endif
218 225
219extern int nfs4_init_ds_session(struct nfs_client *clp); 226extern int nfs4_init_ds_session(struct nfs_client *clp);
@@ -276,11 +283,25 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
276extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 283extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
277 284
278/* write.c */ 285/* write.c */
286extern void nfs_commit_free(struct nfs_write_data *p);
279extern int nfs_initiate_write(struct nfs_write_data *data, 287extern int nfs_initiate_write(struct nfs_write_data *data,
280 struct rpc_clnt *clnt, 288 struct rpc_clnt *clnt,
281 const struct rpc_call_ops *call_ops, 289 const struct rpc_call_ops *call_ops,
282 int how); 290 int how);
283extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 291extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
292extern int nfs_initiate_commit(struct nfs_write_data *data,
293 struct rpc_clnt *clnt,
294 const struct rpc_call_ops *call_ops,
295 int how);
296extern void nfs_init_commit(struct nfs_write_data *data,
297 struct list_head *head,
298 struct pnfs_layout_segment *lseg);
299void nfs_retry_commit(struct list_head *page_list,
300 struct pnfs_layout_segment *lseg);
301void nfs_commit_clear_lock(struct nfs_inode *nfsi);
302void nfs_commitdata_release(void *data);
303void nfs_commit_release_pages(struct nfs_write_data *data);
304
284#ifdef CONFIG_MIGRATION 305#ifdef CONFIG_MIGRATION
285extern int nfs_migrate_page(struct address_space *, 306extern int nfs_migrate_page(struct address_space *,
286 struct page *, struct page *); 307 struct page *, struct page *);
@@ -296,12 +317,14 @@ extern int nfs4_init_client(struct nfs_client *clp,
296 rpc_authflavor_t authflavour, 317 rpc_authflavor_t authflavour,
297 int noresvport); 318 int noresvport);
298extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data); 319extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
299extern int _nfs4_call_sync(struct nfs_server *server, 320extern int _nfs4_call_sync(struct rpc_clnt *clnt,
321 struct nfs_server *server,
300 struct rpc_message *msg, 322 struct rpc_message *msg,
301 struct nfs4_sequence_args *args, 323 struct nfs4_sequence_args *args,
302 struct nfs4_sequence_res *res, 324 struct nfs4_sequence_res *res,
303 int cache_reply); 325 int cache_reply);
304extern int _nfs4_call_sync_session(struct nfs_server *server, 326extern int _nfs4_call_sync_session(struct rpc_clnt *clnt,
327 struct nfs_server *server,
305 struct rpc_message *msg, 328 struct rpc_message *msg,
306 struct nfs4_sequence_args *args, 329 struct nfs4_sequence_args *args,
307 struct nfs4_sequence_res *res, 330 struct nfs4_sequence_res *res,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index c0b8344db0c6..9166fcb66da2 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -15,6 +15,7 @@
15#include <linux/string.h> 15#include <linux/string.h>
16#include <linux/sunrpc/clnt.h> 16#include <linux/sunrpc/clnt.h>
17#include <linux/vfs.h> 17#include <linux/vfs.h>
18#include <linux/sunrpc/gss_api.h>
18#include "internal.h" 19#include "internal.h"
19 20
20#define NFSDBG_FACILITY NFSDBG_VFS 21#define NFSDBG_FACILITY NFSDBG_VFS
@@ -27,7 +28,8 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
27 28
28static struct vfsmount *nfs_do_submount(struct dentry *dentry, 29static struct vfsmount *nfs_do_submount(struct dentry *dentry,
29 struct nfs_fh *fh, 30 struct nfs_fh *fh,
30 struct nfs_fattr *fattr); 31 struct nfs_fattr *fattr,
32 rpc_authflavor_t authflavor);
31 33
32/* 34/*
33 * nfs_path - reconstruct the path given an arbitrary dentry 35 * nfs_path - reconstruct the path given an arbitrary dentry
@@ -98,7 +100,7 @@ rename_retry:
98 namelen--; 100 namelen--;
99 buflen -= namelen; 101 buflen -= namelen;
100 if (buflen < 0) { 102 if (buflen < 0) {
101 spin_lock(&dentry->d_lock); 103 spin_unlock(&dentry->d_lock);
102 rcu_read_unlock(); 104 rcu_read_unlock();
103 goto Elong; 105 goto Elong;
104 } 106 }
@@ -108,7 +110,7 @@ rename_retry:
108 rcu_read_unlock(); 110 rcu_read_unlock();
109 return end; 111 return end;
110Elong_unlock: 112Elong_unlock:
111 spin_lock(&dentry->d_lock); 113 spin_unlock(&dentry->d_lock);
112 rcu_read_unlock(); 114 rcu_read_unlock();
113 if (read_seqretry(&rename_lock, seq)) 115 if (read_seqretry(&rename_lock, seq))
114 goto rename_retry; 116 goto rename_retry;
@@ -116,6 +118,102 @@ Elong:
116 return ERR_PTR(-ENAMETOOLONG); 118 return ERR_PTR(-ENAMETOOLONG);
117} 119}
118 120
121#ifdef CONFIG_NFS_V4
122static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors, struct inode *inode)
123{
124 struct gss_api_mech *mech;
125 struct xdr_netobj oid;
126 int i;
127 rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
128
129 for (i = 0; i < flavors->num_flavors; i++) {
130 struct nfs4_secinfo_flavor *flavor;
131 flavor = &flavors->flavors[i];
132
133 if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
134 pseudoflavor = flavor->flavor;
135 break;
136 } else if (flavor->flavor == RPC_AUTH_GSS) {
137 oid.len = flavor->gss.sec_oid4.len;
138 oid.data = flavor->gss.sec_oid4.data;
139 mech = gss_mech_get_by_OID(&oid);
140 if (!mech)
141 continue;
142 pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
143 gss_mech_put(mech);
144 break;
145 }
146 }
147
148 return pseudoflavor;
149}
150
151static rpc_authflavor_t nfs_negotiate_security(const struct dentry *parent, const struct dentry *dentry)
152{
153 int status = 0;
154 struct page *page;
155 struct nfs4_secinfo_flavors *flavors;
156 int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
157 rpc_authflavor_t flavor = RPC_AUTH_UNIX;
158
159 secinfo = NFS_PROTO(parent->d_inode)->secinfo;
160 if (secinfo != NULL) {
161 page = alloc_page(GFP_KERNEL);
162 if (!page) {
163 status = -ENOMEM;
164 goto out;
165 }
166 flavors = page_address(page);
167 status = secinfo(parent->d_inode, &dentry->d_name, flavors);
168 flavor = nfs_find_best_sec(flavors, dentry->d_inode);
169 put_page(page);
170 }
171
172 return flavor;
173
174out:
175 status = -ENOMEM;
176 return status;
177}
178
179static rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent,
180 struct dentry *dentry, struct path *path,
181 struct nfs_fh *fh, struct nfs_fattr *fattr)
182{
183 rpc_authflavor_t flavor;
184 struct rpc_clnt *clone;
185 struct rpc_auth *auth;
186 int err;
187
188 flavor = nfs_negotiate_security(parent, path->dentry);
189 if (flavor < 0)
190 goto out;
191 clone = rpc_clone_client(server->client);
192 auth = rpcauth_create(flavor, clone);
193 if (!auth) {
194 flavor = -EIO;
195 goto out_shutdown;
196 }
197 err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode,
198 &path->dentry->d_name,
199 fh, fattr);
200 if (err < 0)
201 flavor = err;
202out_shutdown:
203 rpc_shutdown_client(clone);
204out:
205 return flavor;
206}
207#else /* CONFIG_NFS_V4 */
208static inline rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server,
209 struct dentry *parent, struct dentry *dentry,
210 struct path *path, struct nfs_fh *fh,
211 struct nfs_fattr *fattr)
212{
213 return -EPERM;
214}
215#endif /* CONFIG_NFS_V4 */
216
119/* 217/*
120 * nfs_d_automount - Handle crossing a mountpoint on the server 218 * nfs_d_automount - Handle crossing a mountpoint on the server
121 * @path - The mountpoint 219 * @path - The mountpoint
@@ -136,6 +234,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
136 struct nfs_fh *fh = NULL; 234 struct nfs_fh *fh = NULL;
137 struct nfs_fattr *fattr = NULL; 235 struct nfs_fattr *fattr = NULL;
138 int err; 236 int err;
237 rpc_authflavor_t flavor = 1;
139 238
140 dprintk("--> nfs_d_automount()\n"); 239 dprintk("--> nfs_d_automount()\n");
141 240
@@ -153,9 +252,16 @@ struct vfsmount *nfs_d_automount(struct path *path)
153 252
154 /* Look it up again to get its attributes */ 253 /* Look it up again to get its attributes */
155 parent = dget_parent(path->dentry); 254 parent = dget_parent(path->dentry);
156 err = server->nfs_client->rpc_ops->lookup(parent->d_inode, 255 err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode,
157 &path->dentry->d_name, 256 &path->dentry->d_name,
158 fh, fattr); 257 fh, fattr);
258 if (err == -EPERM) {
259 flavor = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr);
260 if (flavor < 0)
261 err = flavor;
262 else
263 err = 0;
264 }
159 dput(parent); 265 dput(parent);
160 if (err != 0) { 266 if (err != 0) {
161 mnt = ERR_PTR(err); 267 mnt = ERR_PTR(err);
@@ -165,7 +271,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
165 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) 271 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
166 mnt = nfs_do_refmount(path->dentry); 272 mnt = nfs_do_refmount(path->dentry);
167 else 273 else
168 mnt = nfs_do_submount(path->dentry, fh, fattr); 274 mnt = nfs_do_submount(path->dentry, fh, fattr, flavor);
169 if (IS_ERR(mnt)) 275 if (IS_ERR(mnt))
170 goto out; 276 goto out;
171 277
@@ -232,17 +338,20 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
232 * @dentry - parent directory 338 * @dentry - parent directory
233 * @fh - filehandle for new root dentry 339 * @fh - filehandle for new root dentry
234 * @fattr - attributes for new root inode 340 * @fattr - attributes for new root inode
341 * @authflavor - security flavor to use when performing the mount
235 * 342 *
236 */ 343 */
237static struct vfsmount *nfs_do_submount(struct dentry *dentry, 344static struct vfsmount *nfs_do_submount(struct dentry *dentry,
238 struct nfs_fh *fh, 345 struct nfs_fh *fh,
239 struct nfs_fattr *fattr) 346 struct nfs_fattr *fattr,
347 rpc_authflavor_t authflavor)
240{ 348{
241 struct nfs_clone_mount mountdata = { 349 struct nfs_clone_mount mountdata = {
242 .sb = dentry->d_sb, 350 .sb = dentry->d_sb,
243 .dentry = dentry, 351 .dentry = dentry,
244 .fh = fh, 352 .fh = fh,
245 .fattr = fattr, 353 .fattr = fattr,
354 .authflavor = authflavor,
246 }; 355 };
247 struct vfsmount *mnt = ERR_PTR(-ENOMEM); 356 struct vfsmount *mnt = ERR_PTR(-ENOMEM);
248 char *page = (char *) __get_free_page(GFP_USER); 357 char *page = (char *) __get_free_page(GFP_USER);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d0c80d8b3f96..38053d823eb0 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -141,7 +141,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
141} 141}
142 142
143static int 143static int
144nfs3_proc_lookup(struct inode *dir, struct qstr *name, 144nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
145 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 145 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
146{ 146{
147 struct nfs3_diropargs arg = { 147 struct nfs3_diropargs arg = {
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c64be1cff080..e1c261ddd65d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -57,7 +57,8 @@ enum nfs4_session_state {
57struct nfs4_minor_version_ops { 57struct nfs4_minor_version_ops {
58 u32 minor_version; 58 u32 minor_version;
59 59
60 int (*call_sync)(struct nfs_server *server, 60 int (*call_sync)(struct rpc_clnt *clnt,
61 struct nfs_server *server,
61 struct rpc_message *msg, 62 struct rpc_message *msg,
62 struct nfs4_sequence_args *args, 63 struct nfs4_sequence_args *args,
63 struct nfs4_sequence_res *res, 64 struct nfs4_sequence_res *res,
@@ -262,6 +263,8 @@ extern int nfs4_proc_destroy_session(struct nfs4_session *);
262extern int nfs4_init_session(struct nfs_server *server); 263extern int nfs4_init_session(struct nfs_server *server);
263extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 264extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
264 struct nfs_fsinfo *fsinfo); 265 struct nfs_fsinfo *fsinfo);
266extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
267 bool sync);
265 268
266static inline bool 269static inline bool
267is_ds_only_client(struct nfs_client *clp) 270is_ds_only_client(struct nfs_client *clp)
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 428558464817..6f8192f4cfc7 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -154,6 +154,23 @@ static int filelayout_read_done_cb(struct rpc_task *task,
154} 154}
155 155
156/* 156/*
157 * We reference the rpc_cred of the first WRITE that triggers the need for
158 * a LAYOUTCOMMIT, and use it to send the layoutcommit compound.
159 * rfc5661 is not clear about which credential should be used.
160 */
161static void
162filelayout_set_layoutcommit(struct nfs_write_data *wdata)
163{
164 if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds ||
165 wdata->res.verf->committed == NFS_FILE_SYNC)
166 return;
167
168 pnfs_set_layoutcommit(wdata);
169 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino,
170 (unsigned long) wdata->lseg->pls_end_pos);
171}
172
173/*
157 * Call ops for the async read/write cases 174 * Call ops for the async read/write cases
158 * In the case of dense layouts, the offset needs to be reset to its 175 * In the case of dense layouts, the offset needs to be reset to its
159 * original value. 176 * original value.
@@ -210,6 +227,38 @@ static int filelayout_write_done_cb(struct rpc_task *task,
210 return -EAGAIN; 227 return -EAGAIN;
211 } 228 }
212 229
230 filelayout_set_layoutcommit(data);
231 return 0;
232}
233
234/* Fake up some data that will cause nfs_commit_release to retry the writes. */
235static void prepare_to_resend_writes(struct nfs_write_data *data)
236{
237 struct nfs_page *first = nfs_list_entry(data->pages.next);
238
239 data->task.tk_status = 0;
240 memcpy(data->verf.verifier, first->wb_verf.verifier,
241 sizeof(first->wb_verf.verifier));
242 data->verf.verifier[0]++; /* ensure verifier mismatch */
243}
244
245static int filelayout_commit_done_cb(struct rpc_task *task,
246 struct nfs_write_data *data)
247{
248 int reset = 0;
249
250 if (filelayout_async_handle_error(task, data->args.context->state,
251 data->ds_clp, &reset) == -EAGAIN) {
252 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
253 __func__, data->ds_clp, data->ds_clp->cl_session);
254 if (reset) {
255 prepare_to_resend_writes(data);
256 filelayout_set_lo_fail(data->lseg);
257 } else
258 nfs_restart_rpc(task, data->ds_clp);
259 return -EAGAIN;
260 }
261
213 return 0; 262 return 0;
214} 263}
215 264
@@ -240,6 +289,16 @@ static void filelayout_write_release(void *data)
240 wdata->mds_ops->rpc_release(data); 289 wdata->mds_ops->rpc_release(data);
241} 290}
242 291
292static void filelayout_commit_release(void *data)
293{
294 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
295
296 nfs_commit_release_pages(wdata);
297 if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
298 nfs_commit_clear_lock(NFS_I(wdata->inode));
299 nfs_commitdata_release(wdata);
300}
301
243struct rpc_call_ops filelayout_read_call_ops = { 302struct rpc_call_ops filelayout_read_call_ops = {
244 .rpc_call_prepare = filelayout_read_prepare, 303 .rpc_call_prepare = filelayout_read_prepare,
245 .rpc_call_done = filelayout_read_call_done, 304 .rpc_call_done = filelayout_read_call_done,
@@ -252,6 +311,12 @@ struct rpc_call_ops filelayout_write_call_ops = {
252 .rpc_release = filelayout_write_release, 311 .rpc_release = filelayout_write_release,
253}; 312};
254 313
314struct rpc_call_ops filelayout_commit_call_ops = {
315 .rpc_call_prepare = filelayout_write_prepare,
316 .rpc_call_done = filelayout_write_call_done,
317 .rpc_release = filelayout_commit_release,
318};
319
255static enum pnfs_try_status 320static enum pnfs_try_status
256filelayout_read_pagelist(struct nfs_read_data *data) 321filelayout_read_pagelist(struct nfs_read_data *data)
257{ 322{
@@ -320,10 +385,6 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
320 data->inode->i_ino, sync, (size_t) data->args.count, offset, 385 data->inode->i_ino, sync, (size_t) data->args.count, offset,
321 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); 386 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
322 387
323 /* We can't handle commit to ds yet */
324 if (!FILELAYOUT_LSEG(lseg)->commit_through_mds)
325 data->args.stable = NFS_FILE_SYNC;
326
327 data->write_done_cb = filelayout_write_done_cb; 388 data->write_done_cb = filelayout_write_done_cb;
328 data->ds_clp = ds->ds_clp; 389 data->ds_clp = ds->ds_clp;
329 fh = nfs4_fl_select_ds_fh(lseg, j); 390 fh = nfs4_fl_select_ds_fh(lseg, j);
@@ -441,12 +502,33 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
441 struct nfs4_layoutget_res *lgr, 502 struct nfs4_layoutget_res *lgr,
442 struct nfs4_deviceid *id) 503 struct nfs4_deviceid *id)
443{ 504{
444 uint32_t *p = (uint32_t *)lgr->layout.buf; 505 struct xdr_stream stream;
506 struct xdr_buf buf = {
507 .pages = lgr->layoutp->pages,
508 .page_len = lgr->layoutp->len,
509 .buflen = lgr->layoutp->len,
510 .len = lgr->layoutp->len,
511 };
512 struct page *scratch;
513 __be32 *p;
445 uint32_t nfl_util; 514 uint32_t nfl_util;
446 int i; 515 int i;
447 516
448 dprintk("%s: set_layout_map Begin\n", __func__); 517 dprintk("%s: set_layout_map Begin\n", __func__);
449 518
519 scratch = alloc_page(GFP_KERNEL);
520 if (!scratch)
521 return -ENOMEM;
522
523 xdr_init_decode(&stream, &buf, NULL);
524 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
525
526 /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
527 * num_fh (4) */
528 p = xdr_inline_decode(&stream, NFS4_DEVICEID4_SIZE + 20);
529 if (unlikely(!p))
530 goto out_err;
531
450 memcpy(id, p, sizeof(*id)); 532 memcpy(id, p, sizeof(*id));
451 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); 533 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
452 print_deviceid(id); 534 print_deviceid(id);
@@ -468,32 +550,57 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
468 __func__, nfl_util, fl->num_fh, fl->first_stripe_index, 550 __func__, nfl_util, fl->num_fh, fl->first_stripe_index,
469 fl->pattern_offset); 551 fl->pattern_offset);
470 552
553 if (!fl->num_fh)
554 goto out_err;
555
471 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), 556 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
472 GFP_KERNEL); 557 GFP_KERNEL);
473 if (!fl->fh_array) 558 if (!fl->fh_array)
474 return -ENOMEM; 559 goto out_err;
475 560
476 for (i = 0; i < fl->num_fh; i++) { 561 for (i = 0; i < fl->num_fh; i++) {
477 /* Do we want to use a mempool here? */ 562 /* Do we want to use a mempool here? */
478 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); 563 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
479 if (!fl->fh_array[i]) { 564 if (!fl->fh_array[i])
480 filelayout_free_fh_array(fl); 565 goto out_err_free;
481 return -ENOMEM; 566
482 } 567 p = xdr_inline_decode(&stream, 4);
568 if (unlikely(!p))
569 goto out_err_free;
483 fl->fh_array[i]->size = be32_to_cpup(p++); 570 fl->fh_array[i]->size = be32_to_cpup(p++);
484 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { 571 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
485 printk(KERN_ERR "Too big fh %d received %d\n", 572 printk(KERN_ERR "Too big fh %d received %d\n",
486 i, fl->fh_array[i]->size); 573 i, fl->fh_array[i]->size);
487 filelayout_free_fh_array(fl); 574 goto out_err_free;
488 return -EIO;
489 } 575 }
576
577 p = xdr_inline_decode(&stream, fl->fh_array[i]->size);
578 if (unlikely(!p))
579 goto out_err_free;
490 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); 580 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
491 p += XDR_QUADLEN(fl->fh_array[i]->size);
492 dprintk("DEBUG: %s: fh len %d\n", __func__, 581 dprintk("DEBUG: %s: fh len %d\n", __func__,
493 fl->fh_array[i]->size); 582 fl->fh_array[i]->size);
494 } 583 }
495 584
585 __free_page(scratch);
496 return 0; 586 return 0;
587
588out_err_free:
589 filelayout_free_fh_array(fl);
590out_err:
591 __free_page(scratch);
592 return -EIO;
593}
594
595static void
596filelayout_free_lseg(struct pnfs_layout_segment *lseg)
597{
598 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
599
600 dprintk("--> %s\n", __func__);
601 nfs4_fl_put_deviceid(fl->dsaddr);
602 kfree(fl->commit_buckets);
603 _filelayout_free_lseg(fl);
497} 604}
498 605
499static struct pnfs_layout_segment * 606static struct pnfs_layout_segment *
@@ -514,17 +621,28 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
514 _filelayout_free_lseg(fl); 621 _filelayout_free_lseg(fl);
515 return NULL; 622 return NULL;
516 } 623 }
517 return &fl->generic_hdr;
518}
519 624
520static void 625 /* This assumes there is only one IOMODE_RW lseg. What
521filelayout_free_lseg(struct pnfs_layout_segment *lseg) 626 * we really want to do is have a layout_hdr level
522{ 627 * dictionary of <multipath_list4, fh> keys, each
523 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 628 * associated with a struct list_head, populated by calls
524 629 * to filelayout_write_pagelist().
525 dprintk("--> %s\n", __func__); 630 * */
526 nfs4_fl_put_deviceid(fl->dsaddr); 631 if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
527 _filelayout_free_lseg(fl); 632 int i;
633 int size = (fl->stripe_type == STRIPE_SPARSE) ?
634 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
635
636 fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL);
637 if (!fl->commit_buckets) {
638 filelayout_free_lseg(&fl->generic_hdr);
639 return NULL;
640 }
641 fl->number_of_buckets = size;
642 for (i = 0; i < size; i++)
643 INIT_LIST_HEAD(&fl->commit_buckets[i]);
644 }
645 return &fl->generic_hdr;
528} 646}
529 647
530/* 648/*
@@ -552,6 +670,191 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
552 return (p_stripe == r_stripe); 670 return (p_stripe == r_stripe);
553} 671}
554 672
673static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
674{
675 return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
676}
677
678static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
679{
680 if (fl->stripe_type == STRIPE_SPARSE)
681 return nfs4_fl_calc_ds_index(&fl->generic_hdr, j);
682 else
683 return j;
684}
685
686struct list_head *filelayout_choose_commit_list(struct nfs_page *req)
687{
688 struct pnfs_layout_segment *lseg = req->wb_commit_lseg;
689 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
690 u32 i, j;
691 struct list_head *list;
692
693 /* Note that we are calling nfs4_fl_calc_j_index on each page
694 * that ends up being committed to a data server. An attractive
695 * alternative is to add a field to nfs_write_data and nfs_page
696 * to store the value calculated in filelayout_write_pagelist
697 * and just use that here.
698 */
699 j = nfs4_fl_calc_j_index(lseg,
700 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
701 i = select_bucket_index(fl, j);
702 list = &fl->commit_buckets[i];
703 if (list_empty(list)) {
704 /* Non-empty buckets hold a reference on the lseg */
705 get_lseg(lseg);
706 }
707 return list;
708}
709
710static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
711{
712 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
713
714 if (flseg->stripe_type == STRIPE_SPARSE)
715 return i;
716 else
717 return nfs4_fl_calc_ds_index(lseg, i);
718}
719
720static struct nfs_fh *
721select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
722{
723 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
724
725 if (flseg->stripe_type == STRIPE_SPARSE) {
726 if (flseg->num_fh == 1)
727 i = 0;
728 else if (flseg->num_fh == 0)
729 /* Use the MDS OPEN fh set in nfs_read_rpcsetup */
730 return NULL;
731 }
732 return flseg->fh_array[i];
733}
734
735static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
736{
737 struct pnfs_layout_segment *lseg = data->lseg;
738 struct nfs4_pnfs_ds *ds;
739 u32 idx;
740 struct nfs_fh *fh;
741
742 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
743 ds = nfs4_fl_prepare_ds(lseg, idx);
744 if (!ds) {
745 printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
746 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
747 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
748 prepare_to_resend_writes(data);
749 data->mds_ops->rpc_release(data);
750 return -EAGAIN;
751 }
752 dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
753 data->write_done_cb = filelayout_commit_done_cb;
754 data->ds_clp = ds->ds_clp;
755 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
756 if (fh)
757 data->args.fh = fh;
758 return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient,
759 &filelayout_commit_call_ops, how);
760}
761
762/*
763 * This is only useful while we are using whole file layouts.
764 */
765static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
766{
767 struct pnfs_layout_segment *lseg, *rv = NULL;
768
769 spin_lock(&inode->i_lock);
770 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
771 if (lseg->pls_range.iomode == IOMODE_RW)
772 rv = get_lseg(lseg);
773 spin_unlock(&inode->i_lock);
774 return rv;
775}
776
777static int alloc_ds_commits(struct inode *inode, struct list_head *list)
778{
779 struct pnfs_layout_segment *lseg;
780 struct nfs4_filelayout_segment *fl;
781 struct nfs_write_data *data;
782 int i, j;
783
784 /* Won't need this when non-whole file layout segments are supported
785 * instead we will use a pnfs_layout_hdr structure */
786 lseg = find_only_write_lseg(inode);
787 if (!lseg)
788 return 0;
789 fl = FILELAYOUT_LSEG(lseg);
790 for (i = 0; i < fl->number_of_buckets; i++) {
791 if (list_empty(&fl->commit_buckets[i]))
792 continue;
793 data = nfs_commitdata_alloc();
794 if (!data)
795 goto out_bad;
796 data->ds_commit_index = i;
797 data->lseg = lseg;
798 list_add(&data->pages, list);
799 }
800 put_lseg(lseg);
801 return 0;
802
803out_bad:
804 for (j = i; j < fl->number_of_buckets; j++) {
805 if (list_empty(&fl->commit_buckets[i]))
806 continue;
807 nfs_retry_commit(&fl->commit_buckets[i], lseg);
808 put_lseg(lseg); /* associated with emptying bucket */
809 }
810 put_lseg(lseg);
811 /* Caller will clean up entries put on list */
812 return -ENOMEM;
813}
814
815/* This follows nfs_commit_list pretty closely */
816static int
817filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
818 int how)
819{
820 struct nfs_write_data *data, *tmp;
821 LIST_HEAD(list);
822
823 if (!list_empty(mds_pages)) {
824 data = nfs_commitdata_alloc();
825 if (!data)
826 goto out_bad;
827 data->lseg = NULL;
828 list_add(&data->pages, &list);
829 }
830
831 if (alloc_ds_commits(inode, &list))
832 goto out_bad;
833
834 list_for_each_entry_safe(data, tmp, &list, pages) {
835 list_del_init(&data->pages);
836 atomic_inc(&NFS_I(inode)->commits_outstanding);
837 if (!data->lseg) {
838 nfs_init_commit(data, mds_pages, NULL);
839 nfs_initiate_commit(data, NFS_CLIENT(inode),
840 data->mds_ops, how);
841 } else {
842 nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg);
843 filelayout_initiate_commit(data, how);
844 }
845 }
846 return 0;
847 out_bad:
848 list_for_each_entry_safe(data, tmp, &list, pages) {
849 nfs_retry_commit(&data->pages, data->lseg);
850 list_del_init(&data->pages);
851 nfs_commit_free(data);
852 }
853 nfs_retry_commit(mds_pages, NULL);
854 nfs_commit_clear_lock(NFS_I(inode));
855 return -ENOMEM;
856}
857
555static struct pnfs_layoutdriver_type filelayout_type = { 858static struct pnfs_layoutdriver_type filelayout_type = {
556 .id = LAYOUT_NFSV4_1_FILES, 859 .id = LAYOUT_NFSV4_1_FILES,
557 .name = "LAYOUT_NFSV4_1_FILES", 860 .name = "LAYOUT_NFSV4_1_FILES",
@@ -559,6 +862,9 @@ static struct pnfs_layoutdriver_type filelayout_type = {
559 .alloc_lseg = filelayout_alloc_lseg, 862 .alloc_lseg = filelayout_alloc_lseg,
560 .free_lseg = filelayout_free_lseg, 863 .free_lseg = filelayout_free_lseg,
561 .pg_test = filelayout_pg_test, 864 .pg_test = filelayout_pg_test,
865 .mark_pnfs_commit = filelayout_mark_pnfs_commit,
866 .choose_commit_list = filelayout_choose_commit_list,
867 .commit_pagelist = filelayout_commit_pagelist,
562 .read_pagelist = filelayout_read_pagelist, 868 .read_pagelist = filelayout_read_pagelist,
563 .write_pagelist = filelayout_write_pagelist, 869 .write_pagelist = filelayout_write_pagelist,
564}; 870};
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index ee0c907742b5..085a354e0f08 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -79,6 +79,8 @@ struct nfs4_filelayout_segment {
79 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ 79 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
80 unsigned int num_fh; 80 unsigned int num_fh;
81 struct nfs_fh **fh_array; 81 struct nfs_fh **fh_array;
82 struct list_head *commit_buckets; /* Sort commits to ds */
83 int number_of_buckets;
82}; 84};
83 85
84static inline struct nfs4_filelayout_segment * 86static inline struct nfs4_filelayout_segment *
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 68143c162e3b..de5350f2b249 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -261,7 +261,7 @@ out:
261 * Currently only support ipv4, and one multi-path address. 261 * Currently only support ipv4, and one multi-path address.
262 */ 262 */
263static struct nfs4_pnfs_ds * 263static struct nfs4_pnfs_ds *
264decode_and_add_ds(__be32 **pp, struct inode *inode) 264decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
265{ 265{
266 struct nfs4_pnfs_ds *ds = NULL; 266 struct nfs4_pnfs_ds *ds = NULL;
267 char *buf; 267 char *buf;
@@ -269,25 +269,34 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
269 u32 ip_addr, port; 269 u32 ip_addr, port;
270 int nlen, rlen, i; 270 int nlen, rlen, i;
271 int tmp[2]; 271 int tmp[2];
272 __be32 *r_netid, *r_addr, *p = *pp; 272 __be32 *p;
273 273
274 /* r_netid */ 274 /* r_netid */
275 p = xdr_inline_decode(streamp, 4);
276 if (unlikely(!p))
277 goto out_err;
275 nlen = be32_to_cpup(p++); 278 nlen = be32_to_cpup(p++);
276 r_netid = p;
277 p += XDR_QUADLEN(nlen);
278 279
279 /* r_addr */ 280 p = xdr_inline_decode(streamp, nlen);
280 rlen = be32_to_cpup(p++); 281 if (unlikely(!p))
281 r_addr = p; 282 goto out_err;
282 p += XDR_QUADLEN(rlen);
283 *pp = p;
284 283
285 /* Check that netid is "tcp" */ 284 /* Check that netid is "tcp" */
286 if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) { 285 if (nlen != 3 || memcmp((char *)p, "tcp", 3)) {
287 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); 286 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
288 goto out_err; 287 goto out_err;
289 } 288 }
290 289
290 /* r_addr */
291 p = xdr_inline_decode(streamp, 4);
292 if (unlikely(!p))
293 goto out_err;
294 rlen = be32_to_cpup(p);
295
296 p = xdr_inline_decode(streamp, rlen);
297 if (unlikely(!p))
298 goto out_err;
299
291 /* ipv6 length plus port is legal */ 300 /* ipv6 length plus port is legal */
292 if (rlen > INET6_ADDRSTRLEN + 8) { 301 if (rlen > INET6_ADDRSTRLEN + 8) {
293 dprintk("%s: Invalid address, length %d\n", __func__, 302 dprintk("%s: Invalid address, length %d\n", __func__,
@@ -300,7 +309,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
300 goto out_err; 309 goto out_err;
301 } 310 }
302 buf[rlen] = '\0'; 311 buf[rlen] = '\0';
303 memcpy(buf, r_addr, rlen); 312 memcpy(buf, p, rlen);
304 313
305 /* replace the port dots with dashes for the in4_pton() delimiter*/ 314 /* replace the port dots with dashes for the in4_pton() delimiter*/
306 for (i = 0; i < 2; i++) { 315 for (i = 0; i < 2; i++) {
@@ -336,90 +345,154 @@ out_err:
336static struct nfs4_file_layout_dsaddr* 345static struct nfs4_file_layout_dsaddr*
337decode_device(struct inode *ino, struct pnfs_device *pdev) 346decode_device(struct inode *ino, struct pnfs_device *pdev)
338{ 347{
339 int i, dummy; 348 int i;
340 u32 cnt, num; 349 u32 cnt, num;
341 u8 *indexp; 350 u8 *indexp;
342 __be32 *p = (__be32 *)pdev->area, *indicesp; 351 __be32 *p;
343 struct nfs4_file_layout_dsaddr *dsaddr; 352 u8 *stripe_indices;
353 u8 max_stripe_index;
354 struct nfs4_file_layout_dsaddr *dsaddr = NULL;
355 struct xdr_stream stream;
356 struct xdr_buf buf = {
357 .pages = pdev->pages,
358 .page_len = pdev->pglen,
359 .buflen = pdev->pglen,
360 .len = pdev->pglen,
361 };
362 struct page *scratch;
363
364 /* set up xdr stream */
365 scratch = alloc_page(GFP_KERNEL);
366 if (!scratch)
367 goto out_err;
368
369 xdr_init_decode(&stream, &buf, NULL);
370 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
344 371
345 /* Get the stripe count (number of stripe index) */ 372 /* Get the stripe count (number of stripe index) */
346 cnt = be32_to_cpup(p++); 373 p = xdr_inline_decode(&stream, 4);
374 if (unlikely(!p))
375 goto out_err_free_scratch;
376
377 cnt = be32_to_cpup(p);
347 dprintk("%s stripe count %d\n", __func__, cnt); 378 dprintk("%s stripe count %d\n", __func__, cnt);
348 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { 379 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
349 printk(KERN_WARNING "%s: stripe count %d greater than " 380 printk(KERN_WARNING "%s: stripe count %d greater than "
350 "supported maximum %d\n", __func__, 381 "supported maximum %d\n", __func__,
351 cnt, NFS4_PNFS_MAX_STRIPE_CNT); 382 cnt, NFS4_PNFS_MAX_STRIPE_CNT);
352 goto out_err; 383 goto out_err_free_scratch;
384 }
385
386 /* read stripe indices */
387 stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL);
388 if (!stripe_indices)
389 goto out_err_free_scratch;
390
391 p = xdr_inline_decode(&stream, cnt << 2);
392 if (unlikely(!p))
393 goto out_err_free_stripe_indices;
394
395 indexp = &stripe_indices[0];
396 max_stripe_index = 0;
397 for (i = 0; i < cnt; i++) {
398 *indexp = be32_to_cpup(p++);
399 max_stripe_index = max(max_stripe_index, *indexp);
400 indexp++;
353 } 401 }
354 402
355 /* Check the multipath list count */ 403 /* Check the multipath list count */
356 indicesp = p; 404 p = xdr_inline_decode(&stream, 4);
357 p += XDR_QUADLEN(cnt << 2); 405 if (unlikely(!p))
358 num = be32_to_cpup(p++); 406 goto out_err_free_stripe_indices;
407
408 num = be32_to_cpup(p);
359 dprintk("%s ds_num %u\n", __func__, num); 409 dprintk("%s ds_num %u\n", __func__, num);
360 if (num > NFS4_PNFS_MAX_MULTI_CNT) { 410 if (num > NFS4_PNFS_MAX_MULTI_CNT) {
361 printk(KERN_WARNING "%s: multipath count %d greater than " 411 printk(KERN_WARNING "%s: multipath count %d greater than "
362 "supported maximum %d\n", __func__, 412 "supported maximum %d\n", __func__,
363 num, NFS4_PNFS_MAX_MULTI_CNT); 413 num, NFS4_PNFS_MAX_MULTI_CNT);
364 goto out_err; 414 goto out_err_free_stripe_indices;
365 } 415 }
416
417 /* validate stripe indices are all < num */
418 if (max_stripe_index >= num) {
419 printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n",
420 __func__, max_stripe_index, num);
421 goto out_err_free_stripe_indices;
422 }
423
366 dsaddr = kzalloc(sizeof(*dsaddr) + 424 dsaddr = kzalloc(sizeof(*dsaddr) +
367 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), 425 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
368 GFP_KERNEL); 426 GFP_KERNEL);
369 if (!dsaddr) 427 if (!dsaddr)
370 goto out_err; 428 goto out_err_free_stripe_indices;
371
372 dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
373 if (!dsaddr->stripe_indices)
374 goto out_err_free;
375 429
376 dsaddr->stripe_count = cnt; 430 dsaddr->stripe_count = cnt;
431 dsaddr->stripe_indices = stripe_indices;
432 stripe_indices = NULL;
377 dsaddr->ds_num = num; 433 dsaddr->ds_num = num;
378 434
379 memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id)); 435 memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id));
380 436
381 /* Go back an read stripe indices */
382 p = indicesp;
383 indexp = &dsaddr->stripe_indices[0];
384 for (i = 0; i < dsaddr->stripe_count; i++) {
385 *indexp = be32_to_cpup(p++);
386 if (*indexp >= num)
387 goto out_err_free;
388 indexp++;
389 }
390 /* Skip already read multipath list count */
391 p++;
392
393 for (i = 0; i < dsaddr->ds_num; i++) { 437 for (i = 0; i < dsaddr->ds_num; i++) {
394 int j; 438 int j;
439 u32 mp_count;
440
441 p = xdr_inline_decode(&stream, 4);
442 if (unlikely(!p))
443 goto out_err_free_deviceid;
395 444
396 dummy = be32_to_cpup(p++); /* multipath count */ 445 mp_count = be32_to_cpup(p); /* multipath count */
397 if (dummy > 1) { 446 if (mp_count > 1) {
398 printk(KERN_WARNING 447 printk(KERN_WARNING
399 "%s: Multipath count %d not supported, " 448 "%s: Multipath count %d not supported, "
400 "skipping all greater than 1\n", __func__, 449 "skipping all greater than 1\n", __func__,
401 dummy); 450 mp_count);
402 } 451 }
403 for (j = 0; j < dummy; j++) { 452 for (j = 0; j < mp_count; j++) {
404 if (j == 0) { 453 if (j == 0) {
405 dsaddr->ds_list[i] = decode_and_add_ds(&p, ino); 454 dsaddr->ds_list[i] = decode_and_add_ds(&stream,
455 ino);
406 if (dsaddr->ds_list[i] == NULL) 456 if (dsaddr->ds_list[i] == NULL)
407 goto out_err_free; 457 goto out_err_free_deviceid;
408 } else { 458 } else {
409 u32 len; 459 u32 len;
410 /* skip extra multipath */ 460 /* skip extra multipath */
411 len = be32_to_cpup(p++); 461
412 p += XDR_QUADLEN(len); 462 /* read len, skip */
413 len = be32_to_cpup(p++); 463 p = xdr_inline_decode(&stream, 4);
414 p += XDR_QUADLEN(len); 464 if (unlikely(!p))
415 continue; 465 goto out_err_free_deviceid;
466 len = be32_to_cpup(p);
467
468 p = xdr_inline_decode(&stream, len);
469 if (unlikely(!p))
470 goto out_err_free_deviceid;
471
472 /* read len, skip */
473 p = xdr_inline_decode(&stream, 4);
474 if (unlikely(!p))
475 goto out_err_free_deviceid;
476 len = be32_to_cpup(p);
477
478 p = xdr_inline_decode(&stream, len);
479 if (unlikely(!p))
480 goto out_err_free_deviceid;
416 } 481 }
417 } 482 }
418 } 483 }
484
485 __free_page(scratch);
419 return dsaddr; 486 return dsaddr;
420 487
421out_err_free: 488out_err_free_deviceid:
422 nfs4_fl_free_deviceid(dsaddr); 489 nfs4_fl_free_deviceid(dsaddr);
490 /* stripe_indicies was part of dsaddr */
491 goto out_err_free_scratch;
492out_err_free_stripe_indices:
493 kfree(stripe_indices);
494out_err_free_scratch:
495 __free_page(scratch);
423out_err: 496out_err:
424 dprintk("%s ERROR: returning NULL\n", __func__); 497 dprintk("%s ERROR: returning NULL\n", __func__);
425 return NULL; 498 return NULL;
@@ -498,11 +571,6 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
498 goto out_free; 571 goto out_free;
499 } 572 }
500 573
501 /* set pdev->area */
502 pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
503 if (!pdev->area)
504 goto out_free;
505
506 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); 574 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
507 pdev->layout_type = LAYOUT_NFSV4_1_FILES; 575 pdev->layout_type = LAYOUT_NFSV4_1_FILES;
508 pdev->pages = pages; 576 pdev->pages = pages;
@@ -521,8 +589,6 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
521 */ 589 */
522 dsaddr = decode_and_add_device(inode, pdev); 590 dsaddr = decode_and_add_device(inode, pdev);
523out_free: 591out_free:
524 if (pdev->area != NULL)
525 vunmap(pdev->area);
526 for (i = 0; i < max_pages; i++) 592 for (i = 0; i < max_pages; i++)
527 __free_page(pages[i]); 593 __free_page(pages[i]);
528 kfree(pages); 594 kfree(pages);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1d84e7088af9..dfd1e6d7e6c3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -41,6 +41,7 @@
41#include <linux/string.h> 41#include <linux/string.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/sunrpc/clnt.h> 43#include <linux/sunrpc/clnt.h>
44#include <linux/sunrpc/gss_api.h>
44#include <linux/nfs.h> 45#include <linux/nfs.h>
45#include <linux/nfs4.h> 46#include <linux/nfs4.h>
46#include <linux/nfs_fs.h> 47#include <linux/nfs_fs.h>
@@ -71,7 +72,9 @@ static int _nfs4_proc_open(struct nfs4_opendata *data);
71static int _nfs4_recover_proc_open(struct nfs4_opendata *data); 72static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
72static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 73static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
73static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 74static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
74static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 75static int _nfs4_proc_lookup(struct rpc_clnt *client, struct inode *dir,
76 const struct qstr *name, struct nfs_fh *fhandle,
77 struct nfs_fattr *fattr);
75static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 78static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
76static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, 79static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
77 struct nfs_fattr *fattr, struct iattr *sattr, 80 struct nfs_fattr *fattr, struct iattr *sattr,
@@ -85,6 +88,8 @@ static int nfs4_map_errors(int err)
85 switch (err) { 88 switch (err) {
86 case -NFS4ERR_RESOURCE: 89 case -NFS4ERR_RESOURCE:
87 return -EREMOTEIO; 90 return -EREMOTEIO;
91 case -NFS4ERR_WRONGSEC:
92 return -EPERM;
88 case -NFS4ERR_BADOWNER: 93 case -NFS4ERR_BADOWNER:
89 case -NFS4ERR_BADNAME: 94 case -NFS4ERR_BADNAME:
90 return -EINVAL; 95 return -EINVAL;
@@ -657,7 +662,8 @@ struct rpc_call_ops nfs41_call_priv_sync_ops = {
657 .rpc_call_done = nfs41_call_sync_done, 662 .rpc_call_done = nfs41_call_sync_done,
658}; 663};
659 664
660static int nfs4_call_sync_sequence(struct nfs_server *server, 665static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
666 struct nfs_server *server,
661 struct rpc_message *msg, 667 struct rpc_message *msg,
662 struct nfs4_sequence_args *args, 668 struct nfs4_sequence_args *args,
663 struct nfs4_sequence_res *res, 669 struct nfs4_sequence_res *res,
@@ -673,7 +679,7 @@ static int nfs4_call_sync_sequence(struct nfs_server *server,
673 .cache_reply = cache_reply, 679 .cache_reply = cache_reply,
674 }; 680 };
675 struct rpc_task_setup task_setup = { 681 struct rpc_task_setup task_setup = {
676 .rpc_client = server->client, 682 .rpc_client = clnt,
677 .rpc_message = msg, 683 .rpc_message = msg,
678 .callback_ops = &nfs41_call_sync_ops, 684 .callback_ops = &nfs41_call_sync_ops,
679 .callback_data = &data 685 .callback_data = &data
@@ -692,13 +698,14 @@ static int nfs4_call_sync_sequence(struct nfs_server *server,
692 return ret; 698 return ret;
693} 699}
694 700
695int _nfs4_call_sync_session(struct nfs_server *server, 701int _nfs4_call_sync_session(struct rpc_clnt *clnt,
702 struct nfs_server *server,
696 struct rpc_message *msg, 703 struct rpc_message *msg,
697 struct nfs4_sequence_args *args, 704 struct nfs4_sequence_args *args,
698 struct nfs4_sequence_res *res, 705 struct nfs4_sequence_res *res,
699 int cache_reply) 706 int cache_reply)
700{ 707{
701 return nfs4_call_sync_sequence(server, msg, args, res, cache_reply, 0); 708 return nfs4_call_sync_sequence(clnt, server, msg, args, res, cache_reply, 0);
702} 709}
703 710
704#else 711#else
@@ -709,19 +716,28 @@ static int nfs4_sequence_done(struct rpc_task *task,
709} 716}
710#endif /* CONFIG_NFS_V4_1 */ 717#endif /* CONFIG_NFS_V4_1 */
711 718
712int _nfs4_call_sync(struct nfs_server *server, 719int _nfs4_call_sync(struct rpc_clnt *clnt,
720 struct nfs_server *server,
713 struct rpc_message *msg, 721 struct rpc_message *msg,
714 struct nfs4_sequence_args *args, 722 struct nfs4_sequence_args *args,
715 struct nfs4_sequence_res *res, 723 struct nfs4_sequence_res *res,
716 int cache_reply) 724 int cache_reply)
717{ 725{
718 args->sa_session = res->sr_session = NULL; 726 args->sa_session = res->sr_session = NULL;
719 return rpc_call_sync(server->client, msg, 0); 727 return rpc_call_sync(clnt, msg, 0);
720} 728}
721 729
722#define nfs4_call_sync(server, msg, args, res, cache_reply) \ 730static inline
723 (server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \ 731int nfs4_call_sync(struct rpc_clnt *clnt,
724 &(res)->seq_res, (cache_reply)) 732 struct nfs_server *server,
733 struct rpc_message *msg,
734 struct nfs4_sequence_args *args,
735 struct nfs4_sequence_res *res,
736 int cache_reply)
737{
738 return server->nfs_client->cl_mvops->call_sync(clnt, server, msg,
739 args, res, cache_reply);
740}
725 741
726static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) 742static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
727{ 743{
@@ -1831,7 +1847,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
1831 } else 1847 } else
1832 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); 1848 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
1833 1849
1834 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 1850 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
1835 if (status == 0 && state != NULL) 1851 if (status == 0 && state != NULL)
1836 renew_lease(server, timestamp); 1852 renew_lease(server, timestamp);
1837 return status; 1853 return status;
@@ -2090,7 +2106,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
2090 }; 2106 };
2091 int status; 2107 int status;
2092 2108
2093 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2109 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2094 if (status == 0) { 2110 if (status == 0) {
2095 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); 2111 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
2096 server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| 2112 server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
@@ -2160,7 +2176,7 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
2160 }; 2176 };
2161 2177
2162 nfs_fattr_init(info->fattr); 2178 nfs_fattr_init(info->fattr);
2163 return nfs4_call_sync(server, &msg, &args, &res, 0); 2179 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2164} 2180}
2165 2181
2166static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, 2182static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -2176,15 +2192,43 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
2176 return err; 2192 return err;
2177} 2193}
2178 2194
2195static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
2196 struct nfs_fsinfo *info, rpc_authflavor_t flavor)
2197{
2198 struct rpc_auth *auth;
2199 int ret;
2200
2201 auth = rpcauth_create(flavor, server->client);
2202 if (!auth) {
2203 ret = -EIO;
2204 goto out;
2205 }
2206 ret = nfs4_lookup_root(server, fhandle, info);
2207 if (ret < 0)
2208 ret = -EAGAIN;
2209out:
2210 return ret;
2211}
2212
2179/* 2213/*
2180 * get the file handle for the "/" directory on the server 2214 * get the file handle for the "/" directory on the server
2181 */ 2215 */
2182static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, 2216static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2183 struct nfs_fsinfo *info) 2217 struct nfs_fsinfo *info)
2184{ 2218{
2185 int status; 2219 int i, len, status = 0;
2220 rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS + 2];
2221
2222 flav_array[0] = RPC_AUTH_UNIX;
2223 len = gss_mech_list_pseudoflavors(&flav_array[1]);
2224 flav_array[1+len] = RPC_AUTH_NULL;
2225 len += 2;
2186 2226
2187 status = nfs4_lookup_root(server, fhandle, info); 2227 for (i = 0; i < len; i++) {
2228 status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]);
2229 if (status == 0)
2230 break;
2231 }
2188 if (status == 0) 2232 if (status == 0)
2189 status = nfs4_server_capabilities(server, fhandle); 2233 status = nfs4_server_capabilities(server, fhandle);
2190 if (status == 0) 2234 if (status == 0)
@@ -2249,7 +2293,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
2249 }; 2293 };
2250 2294
2251 nfs_fattr_init(fattr); 2295 nfs_fattr_init(fattr);
2252 return nfs4_call_sync(server, &msg, &args, &res, 0); 2296 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2253} 2297}
2254 2298
2255static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2299static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
@@ -2309,9 +2353,9 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
2309 return status; 2353 return status;
2310} 2354}
2311 2355
2312static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *dirfh, 2356static int _nfs4_proc_lookupfh(struct rpc_clnt *clnt, struct nfs_server *server,
2313 const struct qstr *name, struct nfs_fh *fhandle, 2357 const struct nfs_fh *dirfh, const struct qstr *name,
2314 struct nfs_fattr *fattr) 2358 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
2315{ 2359{
2316 int status; 2360 int status;
2317 struct nfs4_lookup_arg args = { 2361 struct nfs4_lookup_arg args = {
@@ -2333,7 +2377,7 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *d
2333 nfs_fattr_init(fattr); 2377 nfs_fattr_init(fattr);
2334 2378
2335 dprintk("NFS call lookupfh %s\n", name->name); 2379 dprintk("NFS call lookupfh %s\n", name->name);
2336 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2380 status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0);
2337 dprintk("NFS reply lookupfh: %d\n", status); 2381 dprintk("NFS reply lookupfh: %d\n", status);
2338 return status; 2382 return status;
2339} 2383}
@@ -2345,7 +2389,7 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
2345 struct nfs4_exception exception = { }; 2389 struct nfs4_exception exception = { };
2346 int err; 2390 int err;
2347 do { 2391 do {
2348 err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr); 2392 err = _nfs4_proc_lookupfh(server->client, server, dirfh, name, fhandle, fattr);
2349 /* FIXME: !!!! */ 2393 /* FIXME: !!!! */
2350 if (err == -NFS4ERR_MOVED) { 2394 if (err == -NFS4ERR_MOVED) {
2351 err = -EREMOTE; 2395 err = -EREMOTE;
@@ -2356,27 +2400,41 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
2356 return err; 2400 return err;
2357} 2401}
2358 2402
2359static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, 2403static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
2360 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2404 const struct qstr *name, struct nfs_fh *fhandle,
2405 struct nfs_fattr *fattr)
2361{ 2406{
2362 int status; 2407 int status;
2363 2408
2364 dprintk("NFS call lookup %s\n", name->name); 2409 dprintk("NFS call lookup %s\n", name->name);
2365 status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr); 2410 status = _nfs4_proc_lookupfh(clnt, NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr);
2366 if (status == -NFS4ERR_MOVED) 2411 if (status == -NFS4ERR_MOVED)
2367 status = nfs4_get_referral(dir, name, fattr, fhandle); 2412 status = nfs4_get_referral(dir, name, fattr, fhandle);
2368 dprintk("NFS reply lookup: %d\n", status); 2413 dprintk("NFS reply lookup: %d\n", status);
2369 return status; 2414 return status;
2370} 2415}
2371 2416
2372static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2417void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr, struct nfs_fh *fh)
2418{
2419 memset(fh, 0, sizeof(struct nfs_fh));
2420 fattr->fsid.major = 1;
2421 fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
2422 NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_FSID | NFS_ATTR_FATTR_MOUNTPOINT;
2423 fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO;
2424 fattr->nlink = 2;
2425}
2426
2427static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
2428 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
2373{ 2429{
2374 struct nfs4_exception exception = { }; 2430 struct nfs4_exception exception = { };
2375 int err; 2431 int err;
2376 do { 2432 do {
2377 err = nfs4_handle_exception(NFS_SERVER(dir), 2433 err = nfs4_handle_exception(NFS_SERVER(dir),
2378 _nfs4_proc_lookup(dir, name, fhandle, fattr), 2434 _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr),
2379 &exception); 2435 &exception);
2436 if (err == -EPERM)
2437 nfs_fixup_secinfo_attributes(fattr, fhandle);
2380 } while (exception.retry); 2438 } while (exception.retry);
2381 return err; 2439 return err;
2382} 2440}
@@ -2421,7 +2479,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
2421 if (res.fattr == NULL) 2479 if (res.fattr == NULL)
2422 return -ENOMEM; 2480 return -ENOMEM;
2423 2481
2424 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2482 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2425 if (!status) { 2483 if (!status) {
2426 entry->mask = 0; 2484 entry->mask = 0;
2427 if (res.access & NFS4_ACCESS_READ) 2485 if (res.access & NFS4_ACCESS_READ)
@@ -2488,7 +2546,7 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page,
2488 .rpc_resp = &res, 2546 .rpc_resp = &res,
2489 }; 2547 };
2490 2548
2491 return nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); 2549 return nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0);
2492} 2550}
2493 2551
2494static int nfs4_proc_readlink(struct inode *inode, struct page *page, 2552static int nfs4_proc_readlink(struct inode *inode, struct page *page,
@@ -2577,7 +2635,7 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
2577 if (res.dir_attr == NULL) 2635 if (res.dir_attr == NULL)
2578 goto out; 2636 goto out;
2579 2637
2580 status = nfs4_call_sync(server, &msg, &args, &res, 1); 2638 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
2581 if (status == 0) { 2639 if (status == 0) {
2582 update_changeattr(dir, &res.cinfo); 2640 update_changeattr(dir, &res.cinfo);
2583 nfs_post_op_update_inode(dir, res.dir_attr); 2641 nfs_post_op_update_inode(dir, res.dir_attr);
@@ -2678,7 +2736,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2678 if (res.old_fattr == NULL || res.new_fattr == NULL) 2736 if (res.old_fattr == NULL || res.new_fattr == NULL)
2679 goto out; 2737 goto out;
2680 2738
2681 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 2739 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2682 if (!status) { 2740 if (!status) {
2683 update_changeattr(old_dir, &res.old_cinfo); 2741 update_changeattr(old_dir, &res.old_cinfo);
2684 nfs_post_op_update_inode(old_dir, res.old_fattr); 2742 nfs_post_op_update_inode(old_dir, res.old_fattr);
@@ -2729,7 +2787,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
2729 if (res.fattr == NULL || res.dir_attr == NULL) 2787 if (res.fattr == NULL || res.dir_attr == NULL)
2730 goto out; 2788 goto out;
2731 2789
2732 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 2790 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2733 if (!status) { 2791 if (!status) {
2734 update_changeattr(dir, &res.cinfo); 2792 update_changeattr(dir, &res.cinfo);
2735 nfs_post_op_update_inode(dir, res.dir_attr); 2793 nfs_post_op_update_inode(dir, res.dir_attr);
@@ -2792,8 +2850,8 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
2792 2850
2793static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) 2851static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data)
2794{ 2852{
2795 int status = nfs4_call_sync(NFS_SERVER(dir), &data->msg, 2853 int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg,
2796 &data->arg, &data->res, 1); 2854 &data->arg.seq_args, &data->res.seq_res, 1);
2797 if (status == 0) { 2855 if (status == 0) {
2798 update_changeattr(dir, &data->res.dir_cinfo); 2856 update_changeattr(dir, &data->res.dir_cinfo);
2799 nfs_post_op_update_inode(dir, data->res.dir_fattr); 2857 nfs_post_op_update_inode(dir, data->res.dir_fattr);
@@ -2905,7 +2963,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2905 (unsigned long long)cookie); 2963 (unsigned long long)cookie);
2906 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); 2964 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
2907 res.pgbase = args.pgbase; 2965 res.pgbase = args.pgbase;
2908 status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0); 2966 status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
2909 if (status >= 0) { 2967 if (status >= 0) {
2910 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); 2968 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
2911 status += args.pgbase; 2969 status += args.pgbase;
@@ -2997,7 +3055,7 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
2997 }; 3055 };
2998 3056
2999 nfs_fattr_init(fsstat->fattr); 3057 nfs_fattr_init(fsstat->fattr);
3000 return nfs4_call_sync(server, &msg, &args, &res, 0); 3058 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
3001} 3059}
3002 3060
3003static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) 3061static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat)
@@ -3028,7 +3086,7 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
3028 .rpc_resp = &res, 3086 .rpc_resp = &res,
3029 }; 3087 };
3030 3088
3031 return nfs4_call_sync(server, &msg, &args, &res, 0); 3089 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
3032} 3090}
3033 3091
3034static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) 3092static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
@@ -3073,7 +3131,7 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
3073 } 3131 }
3074 3132
3075 nfs_fattr_init(pathconf->fattr); 3133 nfs_fattr_init(pathconf->fattr);
3076 return nfs4_call_sync(server, &msg, &args, &res, 0); 3134 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
3077} 3135}
3078 3136
3079static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, 3137static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -3195,12 +3253,9 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
3195 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; 3253 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
3196} 3254}
3197 3255
3198static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) 3256static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data)
3199{ 3257{
3200 struct inode *inode = data->inode; 3258 struct inode *inode = data->inode;
3201
3202 if (!nfs4_sequence_done(task, &data->res.seq_res))
3203 return -EAGAIN;
3204 3259
3205 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { 3260 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
3206 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3261 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
@@ -3210,11 +3265,24 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
3210 return 0; 3265 return 0;
3211} 3266}
3212 3267
3268static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
3269{
3270 if (!nfs4_sequence_done(task, &data->res.seq_res))
3271 return -EAGAIN;
3272 return data->write_done_cb(task, data);
3273}
3274
3213static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 3275static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
3214{ 3276{
3215 struct nfs_server *server = NFS_SERVER(data->inode); 3277 struct nfs_server *server = NFS_SERVER(data->inode);
3216 3278
3217 data->args.bitmask = server->cache_consistency_bitmask; 3279 if (data->lseg) {
3280 data->args.bitmask = NULL;
3281 data->res.fattr = NULL;
3282 } else
3283 data->args.bitmask = server->cache_consistency_bitmask;
3284 if (!data->write_done_cb)
3285 data->write_done_cb = nfs4_commit_done_cb;
3218 data->res.server = server; 3286 data->res.server = server;
3219 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; 3287 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
3220} 3288}
@@ -3452,7 +3520,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3452 resp_buf = buf; 3520 resp_buf = buf;
3453 buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); 3521 buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
3454 } 3522 }
3455 ret = nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); 3523 ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0);
3456 if (ret) 3524 if (ret)
3457 goto out_free; 3525 goto out_free;
3458 if (res.acl_len > args.acl_len) 3526 if (res.acl_len > args.acl_len)
@@ -3527,7 +3595,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3527 if (i < 0) 3595 if (i < 0)
3528 return i; 3596 return i;
3529 nfs_inode_return_delegation(inode); 3597 nfs_inode_return_delegation(inode);
3530 ret = nfs4_call_sync(server, &msg, &arg, &res, 1); 3598 ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
3531 3599
3532 /* 3600 /*
3533 * Free each page after tx, so the only ref left is 3601 * Free each page after tx, so the only ref left is
@@ -3890,7 +3958,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3890 lsp = request->fl_u.nfs4_fl.owner; 3958 lsp = request->fl_u.nfs4_fl.owner;
3891 arg.lock_owner.id = lsp->ls_id.id; 3959 arg.lock_owner.id = lsp->ls_id.id;
3892 arg.lock_owner.s_dev = server->s_dev; 3960 arg.lock_owner.s_dev = server->s_dev;
3893 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 3961 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
3894 switch (status) { 3962 switch (status) {
3895 case 0: 3963 case 0:
3896 request->fl_type = F_UNLCK; 3964 request->fl_type = F_UNLCK;
@@ -4618,12 +4686,46 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
4618 nfs_fattr_init(&fs_locations->fattr); 4686 nfs_fattr_init(&fs_locations->fattr);
4619 fs_locations->server = server; 4687 fs_locations->server = server;
4620 fs_locations->nlocations = 0; 4688 fs_locations->nlocations = 0;
4621 status = nfs4_call_sync(server, &msg, &args, &res, 0); 4689 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
4622 nfs_fixup_referral_attributes(&fs_locations->fattr); 4690 nfs_fixup_referral_attributes(&fs_locations->fattr);
4623 dprintk("%s: returned status = %d\n", __func__, status); 4691 dprintk("%s: returned status = %d\n", __func__, status);
4624 return status; 4692 return status;
4625} 4693}
4626 4694
4695static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
4696{
4697 int status;
4698 struct nfs4_secinfo_arg args = {
4699 .dir_fh = NFS_FH(dir),
4700 .name = name,
4701 };
4702 struct nfs4_secinfo_res res = {
4703 .flavors = flavors,
4704 };
4705 struct rpc_message msg = {
4706 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO],
4707 .rpc_argp = &args,
4708 .rpc_resp = &res,
4709 };
4710
4711 dprintk("NFS call secinfo %s\n", name->name);
4712 status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
4713 dprintk("NFS reply secinfo: %d\n", status);
4714 return status;
4715}
4716
4717int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
4718{
4719 struct nfs4_exception exception = { };
4720 int err;
4721 do {
4722 err = nfs4_handle_exception(NFS_SERVER(dir),
4723 _nfs4_proc_secinfo(dir, name, flavors),
4724 &exception);
4725 } while (exception.retry);
4726 return err;
4727}
4728
4627#ifdef CONFIG_NFS_V4_1 4729#ifdef CONFIG_NFS_V4_1
4628/* 4730/*
4629 * Check the exchange flags returned by the server for invalid flags, having 4731 * Check the exchange flags returned by the server for invalid flags, having
@@ -5516,8 +5618,6 @@ static void nfs4_layoutget_release(void *calldata)
5516 struct nfs4_layoutget *lgp = calldata; 5618 struct nfs4_layoutget *lgp = calldata;
5517 5619
5518 dprintk("--> %s\n", __func__); 5620 dprintk("--> %s\n", __func__);
5519 if (lgp->res.layout.buf != NULL)
5520 free_page((unsigned long) lgp->res.layout.buf);
5521 put_nfs_open_context(lgp->args.ctx); 5621 put_nfs_open_context(lgp->args.ctx);
5522 kfree(calldata); 5622 kfree(calldata);
5523 dprintk("<-- %s\n", __func__); 5623 dprintk("<-- %s\n", __func__);
@@ -5549,12 +5649,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
5549 5649
5550 dprintk("--> %s\n", __func__); 5650 dprintk("--> %s\n", __func__);
5551 5651
5552 lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS); 5652 lgp->res.layoutp = &lgp->args.layout;
5553 if (lgp->res.layout.buf == NULL) {
5554 nfs4_layoutget_release(lgp);
5555 return -ENOMEM;
5556 }
5557
5558 lgp->res.seq_res.sr_slot = NULL; 5653 lgp->res.seq_res.sr_slot = NULL;
5559 task = rpc_run_task(&task_setup_data); 5654 task = rpc_run_task(&task_setup_data);
5560 if (IS_ERR(task)) 5655 if (IS_ERR(task))
@@ -5586,7 +5681,7 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5586 int status; 5681 int status;
5587 5682
5588 dprintk("--> %s\n", __func__); 5683 dprintk("--> %s\n", __func__);
5589 status = nfs4_call_sync(server, &msg, &args, &res, 0); 5684 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
5590 dprintk("<-- %s status=%d\n", __func__, status); 5685 dprintk("<-- %s status=%d\n", __func__, status);
5591 5686
5592 return status; 5687 return status;
@@ -5606,6 +5701,100 @@ int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5606} 5701}
5607EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo); 5702EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo);
5608 5703
5704static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata)
5705{
5706 struct nfs4_layoutcommit_data *data = calldata;
5707 struct nfs_server *server = NFS_SERVER(data->args.inode);
5708
5709 if (nfs4_setup_sequence(server, &data->args.seq_args,
5710 &data->res.seq_res, 1, task))
5711 return;
5712 rpc_call_start(task);
5713}
5714
5715static void
5716nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
5717{
5718 struct nfs4_layoutcommit_data *data = calldata;
5719 struct nfs_server *server = NFS_SERVER(data->args.inode);
5720
5721 if (!nfs4_sequence_done(task, &data->res.seq_res))
5722 return;
5723
5724 switch (task->tk_status) { /* Just ignore these failures */
5725 case NFS4ERR_DELEG_REVOKED: /* layout was recalled */
5726 case NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */
5727 case NFS4ERR_BADLAYOUT: /* no layout */
5728 case NFS4ERR_GRACE: /* loca_recalim always false */
5729 task->tk_status = 0;
5730 }
5731
5732 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
5733 nfs_restart_rpc(task, server->nfs_client);
5734 return;
5735 }
5736
5737 if (task->tk_status == 0)
5738 nfs_post_op_update_inode_force_wcc(data->args.inode,
5739 data->res.fattr);
5740}
5741
5742static void nfs4_layoutcommit_release(void *calldata)
5743{
5744 struct nfs4_layoutcommit_data *data = calldata;
5745
5746 /* Matched by references in pnfs_set_layoutcommit */
5747 put_lseg(data->lseg);
5748 put_rpccred(data->cred);
5749 kfree(data);
5750}
5751
5752static const struct rpc_call_ops nfs4_layoutcommit_ops = {
5753 .rpc_call_prepare = nfs4_layoutcommit_prepare,
5754 .rpc_call_done = nfs4_layoutcommit_done,
5755 .rpc_release = nfs4_layoutcommit_release,
5756};
5757
5758int
5759nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
5760{
5761 struct rpc_message msg = {
5762 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTCOMMIT],
5763 .rpc_argp = &data->args,
5764 .rpc_resp = &data->res,
5765 .rpc_cred = data->cred,
5766 };
5767 struct rpc_task_setup task_setup_data = {
5768 .task = &data->task,
5769 .rpc_client = NFS_CLIENT(data->args.inode),
5770 .rpc_message = &msg,
5771 .callback_ops = &nfs4_layoutcommit_ops,
5772 .callback_data = data,
5773 .flags = RPC_TASK_ASYNC,
5774 };
5775 struct rpc_task *task;
5776 int status = 0;
5777
5778 dprintk("NFS: %4d initiating layoutcommit call. sync %d "
5779 "lbw: %llu inode %lu\n",
5780 data->task.tk_pid, sync,
5781 data->args.lastbytewritten,
5782 data->args.inode->i_ino);
5783
5784 task = rpc_run_task(&task_setup_data);
5785 if (IS_ERR(task))
5786 return PTR_ERR(task);
5787 if (sync == false)
5788 goto out;
5789 status = nfs4_wait_for_completion_rpc_task(task);
5790 if (status != 0)
5791 goto out;
5792 status = task->tk_status;
5793out:
5794 dprintk("%s: status %d\n", __func__, status);
5795 rpc_put_task(task);
5796 return status;
5797}
5609#endif /* CONFIG_NFS_V4_1 */ 5798#endif /* CONFIG_NFS_V4_1 */
5610 5799
5611struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 5800struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@@ -5741,6 +5930,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5741 .close_context = nfs4_close_context, 5930 .close_context = nfs4_close_context,
5742 .open_context = nfs4_atomic_open, 5931 .open_context = nfs4_atomic_open,
5743 .init_client = nfs4_init_client, 5932 .init_client = nfs4_init_client,
5933 .secinfo = nfs4_proc_secinfo,
5744}; 5934};
5745 5935
5746static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { 5936static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index ab1bf5bb021f..a6804f704d9d 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -590,7 +590,8 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
590 state->owner = owner; 590 state->owner = owner;
591 atomic_inc(&owner->so_count); 591 atomic_inc(&owner->so_count);
592 list_add(&state->inode_states, &nfsi->open_states); 592 list_add(&state->inode_states, &nfsi->open_states);
593 state->inode = igrab(inode); 593 ihold(inode);
594 state->inode = inode;
594 spin_unlock(&inode->i_lock); 595 spin_unlock(&inode->i_lock);
595 /* Note: The reclaim code dictates that we add stateless 596 /* Note: The reclaim code dictates that we add stateless
596 * and read-only stateids to the end of the list */ 597 * and read-only stateids to the end of the list */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0cf560f77884..dddfb5795d7b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -46,6 +46,7 @@
46#include <linux/kdev_t.h> 46#include <linux/kdev_t.h>
47#include <linux/sunrpc/clnt.h> 47#include <linux/sunrpc/clnt.h>
48#include <linux/sunrpc/msg_prot.h> 48#include <linux/sunrpc/msg_prot.h>
49#include <linux/sunrpc/gss_api.h>
49#include <linux/nfs.h> 50#include <linux/nfs.h>
50#include <linux/nfs4.h> 51#include <linux/nfs4.h>
51#include <linux/nfs_fs.h> 52#include <linux/nfs_fs.h>
@@ -112,7 +113,7 @@ static int nfs4_stat_to_errno(int);
112#define encode_restorefh_maxsz (op_encode_hdr_maxsz) 113#define encode_restorefh_maxsz (op_encode_hdr_maxsz)
113#define decode_restorefh_maxsz (op_decode_hdr_maxsz) 114#define decode_restorefh_maxsz (op_decode_hdr_maxsz)
114#define encode_fsinfo_maxsz (encode_getattr_maxsz) 115#define encode_fsinfo_maxsz (encode_getattr_maxsz)
115#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11) 116#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 15)
116#define encode_renew_maxsz (op_encode_hdr_maxsz + 3) 117#define encode_renew_maxsz (op_encode_hdr_maxsz + 3)
117#define decode_renew_maxsz (op_decode_hdr_maxsz) 118#define decode_renew_maxsz (op_decode_hdr_maxsz)
118#define encode_setclientid_maxsz \ 119#define encode_setclientid_maxsz \
@@ -253,6 +254,8 @@ static int nfs4_stat_to_errno(int);
253 (encode_getattr_maxsz) 254 (encode_getattr_maxsz)
254#define decode_fs_locations_maxsz \ 255#define decode_fs_locations_maxsz \
255 (0) 256 (0)
257#define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
258#define decode_secinfo_maxsz (op_decode_hdr_maxsz + 4 + (NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)))
256 259
257#if defined(CONFIG_NFS_V4_1) 260#if defined(CONFIG_NFS_V4_1)
258#define NFS4_MAX_MACHINE_NAME_LEN (64) 261#define NFS4_MAX_MACHINE_NAME_LEN (64)
@@ -324,6 +327,18 @@ static int nfs4_stat_to_errno(int);
324#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ 327#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
325 decode_stateid_maxsz + \ 328 decode_stateid_maxsz + \
326 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) 329 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
330#define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
331 2 /* offset */ + \
332 2 /* length */ + \
333 1 /* reclaim */ + \
334 encode_stateid_maxsz + \
335 1 /* new offset (true) */ + \
336 2 /* last byte written */ + \
337 1 /* nt_timechanged (false) */ + \
338 1 /* layoutupdate4 layout type */ + \
339 1 /* NULL filelayout layoutupdate4 payload */)
340#define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3)
341
327#else /* CONFIG_NFS_V4_1 */ 342#else /* CONFIG_NFS_V4_1 */
328#define encode_sequence_maxsz 0 343#define encode_sequence_maxsz 0
329#define decode_sequence_maxsz 0 344#define decode_sequence_maxsz 0
@@ -676,6 +691,14 @@ static int nfs4_stat_to_errno(int);
676 decode_putfh_maxsz + \ 691 decode_putfh_maxsz + \
677 decode_lookup_maxsz + \ 692 decode_lookup_maxsz + \
678 decode_fs_locations_maxsz) 693 decode_fs_locations_maxsz)
694#define NFS4_enc_secinfo_sz (compound_encode_hdr_maxsz + \
695 encode_sequence_maxsz + \
696 encode_putfh_maxsz + \
697 encode_secinfo_maxsz)
698#define NFS4_dec_secinfo_sz (compound_decode_hdr_maxsz + \
699 decode_sequence_maxsz + \
700 decode_putfh_maxsz + \
701 decode_secinfo_maxsz)
679#if defined(CONFIG_NFS_V4_1) 702#if defined(CONFIG_NFS_V4_1)
680#define NFS4_enc_exchange_id_sz \ 703#define NFS4_enc_exchange_id_sz \
681 (compound_encode_hdr_maxsz + \ 704 (compound_encode_hdr_maxsz + \
@@ -727,6 +750,17 @@ static int nfs4_stat_to_errno(int);
727 decode_sequence_maxsz + \ 750 decode_sequence_maxsz + \
728 decode_putfh_maxsz + \ 751 decode_putfh_maxsz + \
729 decode_layoutget_maxsz) 752 decode_layoutget_maxsz)
753#define NFS4_enc_layoutcommit_sz (compound_encode_hdr_maxsz + \
754 encode_sequence_maxsz +\
755 encode_putfh_maxsz + \
756 encode_layoutcommit_maxsz + \
757 encode_getattr_maxsz)
758#define NFS4_dec_layoutcommit_sz (compound_decode_hdr_maxsz + \
759 decode_sequence_maxsz + \
760 decode_putfh_maxsz + \
761 decode_layoutcommit_maxsz + \
762 decode_getattr_maxsz)
763
730 764
731const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 765const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
732 compound_encode_hdr_maxsz + 766 compound_encode_hdr_maxsz +
@@ -1620,6 +1654,18 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state
1620 hdr->replen += decode_delegreturn_maxsz; 1654 hdr->replen += decode_delegreturn_maxsz;
1621} 1655}
1622 1656
1657static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
1658{
1659 int len = name->len;
1660 __be32 *p;
1661
1662 p = reserve_space(xdr, 8 + len);
1663 *p++ = cpu_to_be32(OP_SECINFO);
1664 xdr_encode_opaque(p, name->name, len);
1665 hdr->nops++;
1666 hdr->replen += decode_secinfo_maxsz;
1667}
1668
1623#if defined(CONFIG_NFS_V4_1) 1669#if defined(CONFIG_NFS_V4_1)
1624/* NFSv4.1 operations */ 1670/* NFSv4.1 operations */
1625static void encode_exchange_id(struct xdr_stream *xdr, 1671static void encode_exchange_id(struct xdr_stream *xdr,
@@ -1816,6 +1862,34 @@ encode_layoutget(struct xdr_stream *xdr,
1816 hdr->nops++; 1862 hdr->nops++;
1817 hdr->replen += decode_layoutget_maxsz; 1863 hdr->replen += decode_layoutget_maxsz;
1818} 1864}
1865
1866static int
1867encode_layoutcommit(struct xdr_stream *xdr,
1868 const struct nfs4_layoutcommit_args *args,
1869 struct compound_hdr *hdr)
1870{
1871 __be32 *p;
1872
1873 dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten,
1874 NFS_SERVER(args->inode)->pnfs_curr_ld->id);
1875
1876 p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE);
1877 *p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
1878 /* Only whole file layouts */
1879 p = xdr_encode_hyper(p, 0); /* offset */
1880 p = xdr_encode_hyper(p, NFS4_MAX_UINT64); /* length */
1881 *p++ = cpu_to_be32(0); /* reclaim */
1882 p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE);
1883 *p++ = cpu_to_be32(1); /* newoffset = TRUE */
1884 p = xdr_encode_hyper(p, args->lastbytewritten);
1885 *p++ = cpu_to_be32(0); /* Never send time_modify_changed */
1886 *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */
1887 *p++ = cpu_to_be32(0); /* no file layout payload */
1888
1889 hdr->nops++;
1890 hdr->replen += decode_layoutcommit_maxsz;
1891 return 0;
1892}
1819#endif /* CONFIG_NFS_V4_1 */ 1893#endif /* CONFIG_NFS_V4_1 */
1820 1894
1821/* 1895/*
@@ -2294,7 +2368,8 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
2294 encode_sequence(xdr, &args->seq_args, &hdr); 2368 encode_sequence(xdr, &args->seq_args, &hdr);
2295 encode_putfh(xdr, args->fh, &hdr); 2369 encode_putfh(xdr, args->fh, &hdr);
2296 encode_commit(xdr, args, &hdr); 2370 encode_commit(xdr, args, &hdr);
2297 encode_getfattr(xdr, args->bitmask, &hdr); 2371 if (args->bitmask)
2372 encode_getfattr(xdr, args->bitmask, &hdr);
2298 encode_nops(&hdr); 2373 encode_nops(&hdr);
2299} 2374}
2300 2375
@@ -2465,6 +2540,24 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
2465 encode_nops(&hdr); 2540 encode_nops(&hdr);
2466} 2541}
2467 2542
2543/*
2544 * Encode SECINFO request
2545 */
2546static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
2547 struct xdr_stream *xdr,
2548 struct nfs4_secinfo_arg *args)
2549{
2550 struct compound_hdr hdr = {
2551 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2552 };
2553
2554 encode_compound_hdr(xdr, req, &hdr);
2555 encode_sequence(xdr, &args->seq_args, &hdr);
2556 encode_putfh(xdr, args->dir_fh, &hdr);
2557 encode_secinfo(xdr, args->name, &hdr);
2558 encode_nops(&hdr);
2559}
2560
2468#if defined(CONFIG_NFS_V4_1) 2561#if defined(CONFIG_NFS_V4_1)
2469/* 2562/*
2470 * EXCHANGE_ID request 2563 * EXCHANGE_ID request
@@ -2604,8 +2697,32 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
2604 encode_sequence(xdr, &args->seq_args, &hdr); 2697 encode_sequence(xdr, &args->seq_args, &hdr);
2605 encode_putfh(xdr, NFS_FH(args->inode), &hdr); 2698 encode_putfh(xdr, NFS_FH(args->inode), &hdr);
2606 encode_layoutget(xdr, args, &hdr); 2699 encode_layoutget(xdr, args, &hdr);
2700
2701 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
2702 args->layout.pages, 0, args->layout.pglen);
2703
2607 encode_nops(&hdr); 2704 encode_nops(&hdr);
2608} 2705}
2706
2707/*
2708 * Encode LAYOUTCOMMIT request
2709 */
2710static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
2711 struct xdr_stream *xdr,
2712 struct nfs4_layoutcommit_args *args)
2713{
2714 struct compound_hdr hdr = {
2715 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2716 };
2717
2718 encode_compound_hdr(xdr, req, &hdr);
2719 encode_sequence(xdr, &args->seq_args, &hdr);
2720 encode_putfh(xdr, NFS_FH(args->inode), &hdr);
2721 encode_layoutcommit(xdr, args, &hdr);
2722 encode_getfattr(xdr, args->bitmask, &hdr);
2723 encode_nops(&hdr);
2724 return 0;
2725}
2609#endif /* CONFIG_NFS_V4_1 */ 2726#endif /* CONFIG_NFS_V4_1 */
2610 2727
2611static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 2728static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -2925,6 +3042,7 @@ static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap)
2925 if (unlikely(!p)) 3042 if (unlikely(!p))
2926 goto out_overflow; 3043 goto out_overflow;
2927 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; 3044 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
3045 return -be32_to_cpup(p);
2928 } 3046 }
2929 return 0; 3047 return 0;
2930out_overflow: 3048out_overflow:
@@ -3912,6 +4030,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
3912 fattr->valid |= status; 4030 fattr->valid |= status;
3913 4031
3914 status = decode_attr_error(xdr, bitmap); 4032 status = decode_attr_error(xdr, bitmap);
4033 if (status == -NFS4ERR_WRONGSEC) {
4034 nfs_fixup_secinfo_attributes(fattr, fh);
4035 status = 0;
4036 }
3915 if (status < 0) 4037 if (status < 0)
3916 goto xdr_error; 4038 goto xdr_error;
3917 4039
@@ -4680,6 +4802,73 @@ static int decode_delegreturn(struct xdr_stream *xdr)
4680 return decode_op_hdr(xdr, OP_DELEGRETURN); 4802 return decode_op_hdr(xdr, OP_DELEGRETURN);
4681} 4803}
4682 4804
4805static int decode_secinfo_gss(struct xdr_stream *xdr, struct nfs4_secinfo_flavor *flavor)
4806{
4807 __be32 *p;
4808
4809 p = xdr_inline_decode(xdr, 4);
4810 if (unlikely(!p))
4811 goto out_overflow;
4812 flavor->gss.sec_oid4.len = be32_to_cpup(p);
4813 if (flavor->gss.sec_oid4.len > GSS_OID_MAX_LEN)
4814 goto out_err;
4815
4816 p = xdr_inline_decode(xdr, flavor->gss.sec_oid4.len);
4817 if (unlikely(!p))
4818 goto out_overflow;
4819 memcpy(flavor->gss.sec_oid4.data, p, flavor->gss.sec_oid4.len);
4820
4821 p = xdr_inline_decode(xdr, 8);
4822 if (unlikely(!p))
4823 goto out_overflow;
4824 flavor->gss.qop4 = be32_to_cpup(p++);
4825 flavor->gss.service = be32_to_cpup(p);
4826
4827 return 0;
4828
4829out_overflow:
4830 print_overflow_msg(__func__, xdr);
4831 return -EIO;
4832out_err:
4833 return -EINVAL;
4834}
4835
4836static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
4837{
4838 struct nfs4_secinfo_flavor *sec_flavor;
4839 int status;
4840 __be32 *p;
4841 int i;
4842
4843 status = decode_op_hdr(xdr, OP_SECINFO);
4844 p = xdr_inline_decode(xdr, 4);
4845 if (unlikely(!p))
4846 goto out_overflow;
4847 res->flavors->num_flavors = be32_to_cpup(p);
4848
4849 for (i = 0; i < res->flavors->num_flavors; i++) {
4850 sec_flavor = &res->flavors->flavors[i];
4851 if ((char *)&sec_flavor[1] - (char *)res > PAGE_SIZE)
4852 break;
4853
4854 p = xdr_inline_decode(xdr, 4);
4855 if (unlikely(!p))
4856 goto out_overflow;
4857 sec_flavor->flavor = be32_to_cpup(p);
4858
4859 if (sec_flavor->flavor == RPC_AUTH_GSS) {
4860 if (decode_secinfo_gss(xdr, sec_flavor))
4861 break;
4862 }
4863 }
4864
4865 return 0;
4866
4867out_overflow:
4868 print_overflow_msg(__func__, xdr);
4869 return -EIO;
4870}
4871
4683#if defined(CONFIG_NFS_V4_1) 4872#if defined(CONFIG_NFS_V4_1)
4684static int decode_exchange_id(struct xdr_stream *xdr, 4873static int decode_exchange_id(struct xdr_stream *xdr,
4685 struct nfs41_exchange_id_res *res) 4874 struct nfs41_exchange_id_res *res)
@@ -4950,6 +5139,9 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
4950 __be32 *p; 5139 __be32 *p;
4951 int status; 5140 int status;
4952 u32 layout_count; 5141 u32 layout_count;
5142 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
5143 struct kvec *iov = rcvbuf->head;
5144 u32 hdrlen, recvd;
4953 5145
4954 status = decode_op_hdr(xdr, OP_LAYOUTGET); 5146 status = decode_op_hdr(xdr, OP_LAYOUTGET);
4955 if (status) 5147 if (status)
@@ -4966,17 +5158,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
4966 return -EINVAL; 5158 return -EINVAL;
4967 } 5159 }
4968 5160
4969 p = xdr_inline_decode(xdr, 24); 5161 p = xdr_inline_decode(xdr, 28);
4970 if (unlikely(!p)) 5162 if (unlikely(!p))
4971 goto out_overflow; 5163 goto out_overflow;
4972 p = xdr_decode_hyper(p, &res->range.offset); 5164 p = xdr_decode_hyper(p, &res->range.offset);
4973 p = xdr_decode_hyper(p, &res->range.length); 5165 p = xdr_decode_hyper(p, &res->range.length);
4974 res->range.iomode = be32_to_cpup(p++); 5166 res->range.iomode = be32_to_cpup(p++);
4975 res->type = be32_to_cpup(p++); 5167 res->type = be32_to_cpup(p++);
4976 5168 res->layoutp->len = be32_to_cpup(p);
4977 status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
4978 if (unlikely(status))
4979 return status;
4980 5169
4981 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n", 5170 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
4982 __func__, 5171 __func__,
@@ -4984,12 +5173,18 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
4984 (unsigned long)res->range.length, 5173 (unsigned long)res->range.length,
4985 res->range.iomode, 5174 res->range.iomode,
4986 res->type, 5175 res->type,
4987 res->layout.len); 5176 res->layoutp->len);
4988 5177
4989 /* nfs4_proc_layoutget allocated a single page */ 5178 hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base;
4990 if (res->layout.len > PAGE_SIZE) 5179 recvd = req->rq_rcv_buf.len - hdrlen;
4991 return -ENOMEM; 5180 if (res->layoutp->len > recvd) {
4992 memcpy(res->layout.buf, p, res->layout.len); 5181 dprintk("NFS: server cheating in layoutget reply: "
5182 "layout len %u > recvd %u\n",
5183 res->layoutp->len, recvd);
5184 return -EINVAL;
5185 }
5186
5187 xdr_read_pages(xdr, res->layoutp->len);
4993 5188
4994 if (layout_count > 1) { 5189 if (layout_count > 1) {
4995 /* We only handle a length one array at the moment. Any 5190 /* We only handle a length one array at the moment. Any
@@ -5006,6 +5201,35 @@ out_overflow:
5006 print_overflow_msg(__func__, xdr); 5201 print_overflow_msg(__func__, xdr);
5007 return -EIO; 5202 return -EIO;
5008} 5203}
5204
5205static int decode_layoutcommit(struct xdr_stream *xdr,
5206 struct rpc_rqst *req,
5207 struct nfs4_layoutcommit_res *res)
5208{
5209 __be32 *p;
5210 __u32 sizechanged;
5211 int status;
5212
5213 status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT);
5214 if (status)
5215 return status;
5216
5217 p = xdr_inline_decode(xdr, 4);
5218 if (unlikely(!p))
5219 goto out_overflow;
5220 sizechanged = be32_to_cpup(p);
5221
5222 if (sizechanged) {
5223 /* throw away new size */
5224 p = xdr_inline_decode(xdr, 8);
5225 if (unlikely(!p))
5226 goto out_overflow;
5227 }
5228 return 0;
5229out_overflow:
5230 print_overflow_msg(__func__, xdr);
5231 return -EIO;
5232}
5009#endif /* CONFIG_NFS_V4_1 */ 5233#endif /* CONFIG_NFS_V4_1 */
5010 5234
5011/* 5235/*
@@ -5723,8 +5947,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5723 status = decode_commit(xdr, res); 5947 status = decode_commit(xdr, res);
5724 if (status) 5948 if (status)
5725 goto out; 5949 goto out;
5726 decode_getfattr(xdr, res->fattr, res->server, 5950 if (res->fattr)
5727 !RPC_IS_ASYNC(rqstp->rq_task)); 5951 decode_getfattr(xdr, res->fattr, res->server,
5952 !RPC_IS_ASYNC(rqstp->rq_task));
5728out: 5953out:
5729 return status; 5954 return status;
5730} 5955}
@@ -5919,6 +6144,32 @@ out:
5919 return status; 6144 return status;
5920} 6145}
5921 6146
6147/*
6148 * Decode SECINFO response
6149 */
6150static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp,
6151 struct xdr_stream *xdr,
6152 struct nfs4_secinfo_res *res)
6153{
6154 struct compound_hdr hdr;
6155 int status;
6156
6157 status = decode_compound_hdr(xdr, &hdr);
6158 if (status)
6159 goto out;
6160 status = decode_sequence(xdr, &res->seq_res, rqstp);
6161 if (status)
6162 goto out;
6163 status = decode_putfh(xdr);
6164 if (status)
6165 goto out;
6166 status = decode_secinfo(xdr, res);
6167 if (status)
6168 goto out;
6169out:
6170 return status;
6171}
6172
5922#if defined(CONFIG_NFS_V4_1) 6173#if defined(CONFIG_NFS_V4_1)
5923/* 6174/*
5924 * Decode EXCHANGE_ID response 6175 * Decode EXCHANGE_ID response
@@ -6066,6 +6317,34 @@ static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
6066out: 6317out:
6067 return status; 6318 return status;
6068} 6319}
6320
6321/*
6322 * Decode LAYOUTCOMMIT response
6323 */
6324static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
6325 struct xdr_stream *xdr,
6326 struct nfs4_layoutcommit_res *res)
6327{
6328 struct compound_hdr hdr;
6329 int status;
6330
6331 status = decode_compound_hdr(xdr, &hdr);
6332 if (status)
6333 goto out;
6334 status = decode_sequence(xdr, &res->seq_res, rqstp);
6335 if (status)
6336 goto out;
6337 status = decode_putfh(xdr);
6338 if (status)
6339 goto out;
6340 status = decode_layoutcommit(xdr, rqstp, res);
6341 if (status)
6342 goto out;
6343 decode_getfattr(xdr, res->fattr, res->server,
6344 !RPC_IS_ASYNC(rqstp->rq_task));
6345out:
6346 return status;
6347}
6069#endif /* CONFIG_NFS_V4_1 */ 6348#endif /* CONFIG_NFS_V4_1 */
6070 6349
6071/** 6350/**
@@ -6180,10 +6459,6 @@ static struct {
6180 { NFS4ERR_SYMLINK, -ELOOP }, 6459 { NFS4ERR_SYMLINK, -ELOOP },
6181 { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, 6460 { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
6182 { NFS4ERR_DEADLOCK, -EDEADLK }, 6461 { NFS4ERR_DEADLOCK, -EDEADLK },
6183 { NFS4ERR_WRONGSEC, -EPERM }, /* FIXME: this needs
6184 * to be handled by a
6185 * middle-layer.
6186 */
6187 { -1, -EIO } 6462 { -1, -EIO }
6188}; 6463};
6189 6464
@@ -6258,6 +6533,7 @@ struct rpc_procinfo nfs4_procedures[] = {
6258 PROC(SETACL, enc_setacl, dec_setacl), 6533 PROC(SETACL, enc_setacl, dec_setacl),
6259 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), 6534 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
6260 PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), 6535 PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
6536 PROC(SECINFO, enc_secinfo, dec_secinfo),
6261#if defined(CONFIG_NFS_V4_1) 6537#if defined(CONFIG_NFS_V4_1)
6262 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), 6538 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
6263 PROC(CREATE_SESSION, enc_create_session, dec_create_session), 6539 PROC(CREATE_SESSION, enc_create_session, dec_create_session),
@@ -6267,6 +6543,7 @@ struct rpc_procinfo nfs4_procedures[] = {
6267 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), 6543 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
6268 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), 6544 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
6269 PROC(LAYOUTGET, enc_layoutget, dec_layoutget), 6545 PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
6546 PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit),
6270#endif /* CONFIG_NFS_V4_1 */ 6547#endif /* CONFIG_NFS_V4_1 */
6271}; 6548};
6272 6549
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 23e794410669..c80add6e2213 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -135,14 +135,14 @@ void nfs_clear_page_tag_locked(struct nfs_page *req)
135 nfs_unlock_request(req); 135 nfs_unlock_request(req);
136} 136}
137 137
138/** 138/*
139 * nfs_clear_request - Free up all resources allocated to the request 139 * nfs_clear_request - Free up all resources allocated to the request
140 * @req: 140 * @req:
141 * 141 *
142 * Release page and open context resources associated with a read/write 142 * Release page and open context resources associated with a read/write
143 * request after it has completed. 143 * request after it has completed.
144 */ 144 */
145void nfs_clear_request(struct nfs_page *req) 145static void nfs_clear_request(struct nfs_page *req)
146{ 146{
147 struct page *page = req->wb_page; 147 struct page *page = req->wb_page;
148 struct nfs_open_context *ctx = req->wb_context; 148 struct nfs_open_context *ctx = req->wb_context;
@@ -223,6 +223,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
223 desc->pg_count = 0; 223 desc->pg_count = 0;
224 desc->pg_bsize = bsize; 224 desc->pg_bsize = bsize;
225 desc->pg_base = 0; 225 desc->pg_base = 0;
226 desc->pg_moreio = 0;
226 desc->pg_inode = inode; 227 desc->pg_inode = inode;
227 desc->pg_doio = doio; 228 desc->pg_doio = doio;
228 desc->pg_ioflags = io_flags; 229 desc->pg_ioflags = io_flags;
@@ -335,9 +336,11 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
335 struct nfs_page *req) 336 struct nfs_page *req)
336{ 337{
337 while (!nfs_pageio_do_add_request(desc, req)) { 338 while (!nfs_pageio_do_add_request(desc, req)) {
339 desc->pg_moreio = 1;
338 nfs_pageio_doio(desc); 340 nfs_pageio_doio(desc);
339 if (desc->pg_error < 0) 341 if (desc->pg_error < 0)
340 return 0; 342 return 0;
343 desc->pg_moreio = 0;
341 } 344 }
342 return 1; 345 return 1;
343} 346}
@@ -395,6 +398,7 @@ int nfs_scan_list(struct nfs_inode *nfsi,
395 pgoff_t idx_end; 398 pgoff_t idx_end;
396 int found, i; 399 int found, i;
397 int res; 400 int res;
401 struct list_head *list;
398 402
399 res = 0; 403 res = 0;
400 if (npages == 0) 404 if (npages == 0)
@@ -415,10 +419,10 @@ int nfs_scan_list(struct nfs_inode *nfsi,
415 idx_start = req->wb_index + 1; 419 idx_start = req->wb_index + 1;
416 if (nfs_set_page_tag_locked(req)) { 420 if (nfs_set_page_tag_locked(req)) {
417 kref_get(&req->wb_kref); 421 kref_get(&req->wb_kref);
418 nfs_list_remove_request(req);
419 radix_tree_tag_clear(&nfsi->nfs_page_tree, 422 radix_tree_tag_clear(&nfsi->nfs_page_tree,
420 req->wb_index, tag); 423 req->wb_index, tag);
421 nfs_list_add_request(req, dst); 424 list = pnfs_choose_commit_list(req, dst);
425 nfs_list_add_request(req, list);
422 res++; 426 res++;
423 if (res == INT_MAX) 427 if (res == INT_MAX)
424 goto out; 428 goto out;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index f38813a0a295..d9ab97269ce6 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -259,6 +259,7 @@ put_lseg(struct pnfs_layout_segment *lseg)
259 pnfs_free_lseg_list(&free_me); 259 pnfs_free_lseg_list(&free_me);
260 } 260 }
261} 261}
262EXPORT_SYMBOL_GPL(put_lseg);
262 263
263static bool 264static bool
264should_free_lseg(u32 lseg_iomode, u32 recall_iomode) 265should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
@@ -471,6 +472,9 @@ send_layoutget(struct pnfs_layout_hdr *lo,
471 struct nfs_server *server = NFS_SERVER(ino); 472 struct nfs_server *server = NFS_SERVER(ino);
472 struct nfs4_layoutget *lgp; 473 struct nfs4_layoutget *lgp;
473 struct pnfs_layout_segment *lseg = NULL; 474 struct pnfs_layout_segment *lseg = NULL;
475 struct page **pages = NULL;
476 int i;
477 u32 max_resp_sz, max_pages;
474 478
475 dprintk("--> %s\n", __func__); 479 dprintk("--> %s\n", __func__);
476 480
@@ -478,6 +482,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
478 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); 482 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
479 if (lgp == NULL) 483 if (lgp == NULL)
480 return NULL; 484 return NULL;
485
486 /* allocate pages for xdr post processing */
487 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
488 max_pages = max_resp_sz >> PAGE_SHIFT;
489
490 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
491 if (!pages)
492 goto out_err_free;
493
494 for (i = 0; i < max_pages; i++) {
495 pages[i] = alloc_page(GFP_KERNEL);
496 if (!pages[i])
497 goto out_err_free;
498 }
499
481 lgp->args.minlength = NFS4_MAX_UINT64; 500 lgp->args.minlength = NFS4_MAX_UINT64;
482 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 501 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
483 lgp->args.range.iomode = iomode; 502 lgp->args.range.iomode = iomode;
@@ -486,6 +505,8 @@ send_layoutget(struct pnfs_layout_hdr *lo,
486 lgp->args.type = server->pnfs_curr_ld->id; 505 lgp->args.type = server->pnfs_curr_ld->id;
487 lgp->args.inode = ino; 506 lgp->args.inode = ino;
488 lgp->args.ctx = get_nfs_open_context(ctx); 507 lgp->args.ctx = get_nfs_open_context(ctx);
508 lgp->args.layout.pages = pages;
509 lgp->args.layout.pglen = max_pages * PAGE_SIZE;
489 lgp->lsegpp = &lseg; 510 lgp->lsegpp = &lseg;
490 511
491 /* Synchronously retrieve layout information from server and 512 /* Synchronously retrieve layout information from server and
@@ -496,7 +517,26 @@ send_layoutget(struct pnfs_layout_hdr *lo,
496 /* remember that LAYOUTGET failed and suspend trying */ 517 /* remember that LAYOUTGET failed and suspend trying */
497 set_bit(lo_fail_bit(iomode), &lo->plh_flags); 518 set_bit(lo_fail_bit(iomode), &lo->plh_flags);
498 } 519 }
520
521 /* free xdr pages */
522 for (i = 0; i < max_pages; i++)
523 __free_page(pages[i]);
524 kfree(pages);
525
499 return lseg; 526 return lseg;
527
528out_err_free:
529 /* free any allocated xdr pages, lgp as it's not used */
530 if (pages) {
531 for (i = 0; i < max_pages; i++) {
532 if (!pages[i])
533 break;
534 __free_page(pages[i]);
535 }
536 kfree(pages);
537 }
538 kfree(lgp);
539 return NULL;
500} 540}
501 541
502bool pnfs_roc(struct inode *ino) 542bool pnfs_roc(struct inode *ino)
@@ -945,3 +985,105 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
945 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 985 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
946 return trypnfs; 986 return trypnfs;
947} 987}
988
989/*
990 * Currently there is only one (whole file) write lseg.
991 */
992static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode)
993{
994 struct pnfs_layout_segment *lseg, *rv = NULL;
995
996 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
997 if (lseg->pls_range.iomode == IOMODE_RW)
998 rv = lseg;
999 return rv;
1000}
1001
1002void
1003pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1004{
1005 struct nfs_inode *nfsi = NFS_I(wdata->inode);
1006 loff_t end_pos = wdata->args.offset + wdata->res.count;
1007
1008 spin_lock(&nfsi->vfs_inode.i_lock);
1009 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1010 /* references matched in nfs4_layoutcommit_release */
1011 get_lseg(wdata->lseg);
1012 wdata->lseg->pls_lc_cred =
1013 get_rpccred(wdata->args.context->state->owner->so_cred);
1014 mark_inode_dirty_sync(wdata->inode);
1015 dprintk("%s: Set layoutcommit for inode %lu ",
1016 __func__, wdata->inode->i_ino);
1017 }
1018 if (end_pos > wdata->lseg->pls_end_pos)
1019 wdata->lseg->pls_end_pos = end_pos;
1020 spin_unlock(&nfsi->vfs_inode.i_lock);
1021}
1022EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
1023
1024/*
1025 * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and
1026 * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
1027 * data to disk to allow the server to recover the data if it crashes.
1028 * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag
1029 * is off, and a COMMIT is sent to a data server, or
1030 * if WRITEs to a data server return NFS_DATA_SYNC.
1031 */
1032int
1033pnfs_layoutcommit_inode(struct inode *inode, bool sync)
1034{
1035 struct nfs4_layoutcommit_data *data;
1036 struct nfs_inode *nfsi = NFS_I(inode);
1037 struct pnfs_layout_segment *lseg;
1038 struct rpc_cred *cred;
1039 loff_t end_pos;
1040 int status = 0;
1041
1042 dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
1043
1044 if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
1045 return 0;
1046
1047 /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
1048 data = kzalloc(sizeof(*data), GFP_NOFS);
1049 if (!data) {
1050 mark_inode_dirty_sync(inode);
1051 status = -ENOMEM;
1052 goto out;
1053 }
1054
1055 spin_lock(&inode->i_lock);
1056 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1057 spin_unlock(&inode->i_lock);
1058 kfree(data);
1059 goto out;
1060 }
1061 /*
1062 * Currently only one (whole file) write lseg which is referenced
1063 * in pnfs_set_layoutcommit and will be found.
1064 */
1065 lseg = pnfs_list_write_lseg(inode);
1066
1067 end_pos = lseg->pls_end_pos;
1068 cred = lseg->pls_lc_cred;
1069 lseg->pls_end_pos = 0;
1070 lseg->pls_lc_cred = NULL;
1071
1072 memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data,
1073 sizeof(nfsi->layout->plh_stateid.data));
1074 spin_unlock(&inode->i_lock);
1075
1076 data->args.inode = inode;
1077 data->lseg = lseg;
1078 data->cred = cred;
1079 nfs_fattr_init(&data->fattr);
1080 data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
1081 data->res.fattr = &data->fattr;
1082 data->args.lastbytewritten = end_pos - 1;
1083 data->res.server = NFS_SERVER(inode);
1084
1085 status = nfs4_proc_layoutcommit(data, sync);
1086out:
1087 dprintk("<-- %s status %d\n", __func__, status);
1088 return status;
1089}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 6380b9405bcd..bc4827202e7a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -43,6 +43,8 @@ struct pnfs_layout_segment {
43 atomic_t pls_refcount; 43 atomic_t pls_refcount;
44 unsigned long pls_flags; 44 unsigned long pls_flags;
45 struct pnfs_layout_hdr *pls_layout; 45 struct pnfs_layout_hdr *pls_layout;
46 struct rpc_cred *pls_lc_cred; /* LAYOUTCOMMIT credential */
47 loff_t pls_end_pos; /* LAYOUTCOMMIT write end */
46}; 48};
47 49
48enum pnfs_try_status { 50enum pnfs_try_status {
@@ -74,6 +76,13 @@ struct pnfs_layoutdriver_type {
74 /* test for nfs page cache coalescing */ 76 /* test for nfs page cache coalescing */
75 int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); 77 int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
76 78
79 /* Returns true if layoutdriver wants to divert this request to
80 * driver's commit routine.
81 */
82 bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg);
83 struct list_head * (*choose_commit_list) (struct nfs_page *req);
84 int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how);
85
77 /* 86 /*
78 * Return PNFS_ATTEMPTED to indicate the layout code has attempted 87 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
79 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS 88 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
@@ -100,7 +109,6 @@ struct pnfs_device {
100 unsigned int layout_type; 109 unsigned int layout_type;
101 unsigned int mincount; 110 unsigned int mincount;
102 struct page **pages; 111 struct page **pages;
103 void *area;
104 unsigned int pgbase; 112 unsigned int pgbase;
105 unsigned int pglen; 113 unsigned int pglen;
106}; 114};
@@ -145,7 +153,8 @@ bool pnfs_roc(struct inode *ino);
145void pnfs_roc_release(struct inode *ino); 153void pnfs_roc_release(struct inode *ino);
146void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); 154void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
147bool pnfs_roc_drain(struct inode *ino, u32 *barrier); 155bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
148 156void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
157int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
149 158
150static inline int lo_fail_bit(u32 iomode) 159static inline int lo_fail_bit(u32 iomode)
151{ 160{
@@ -169,6 +178,51 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
169 return nfss->pnfs_curr_ld != NULL; 178 return nfss->pnfs_curr_ld != NULL;
170} 179}
171 180
181static inline void
182pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
183{
184 if (lseg) {
185 struct pnfs_layoutdriver_type *ld;
186
187 ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld;
188 if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) {
189 set_bit(PG_PNFS_COMMIT, &req->wb_flags);
190 req->wb_commit_lseg = get_lseg(lseg);
191 }
192 }
193}
194
195static inline int
196pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
197{
198 if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags))
199 return PNFS_NOT_ATTEMPTED;
200 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how);
201}
202
203static inline struct list_head *
204pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds)
205{
206 struct list_head *rv;
207
208 if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) {
209 struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode;
210
211 set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags);
212 rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req);
213 /* matched by ref taken when PG_PNFS_COMMIT is set */
214 put_lseg(req->wb_commit_lseg);
215 } else
216 rv = mds;
217 return rv;
218}
219
220static inline void pnfs_clear_request_commit(struct nfs_page *req)
221{
222 if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags))
223 put_lseg(req->wb_commit_lseg);
224}
225
172#else /* CONFIG_NFS_V4_1 */ 226#else /* CONFIG_NFS_V4_1 */
173 227
174static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) 228static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -252,6 +306,31 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
252 pgio->pg_test = NULL; 306 pgio->pg_test = NULL;
253} 307}
254 308
309static inline void
310pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
311{
312}
313
314static inline int
315pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
316{
317 return PNFS_NOT_ATTEMPTED;
318}
319
320static inline struct list_head *
321pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds)
322{
323 return mds;
324}
325
326static inline void pnfs_clear_request_commit(struct nfs_page *req)
327{
328}
329
330static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
331{
332 return 0;
333}
255#endif /* CONFIG_NFS_V4_1 */ 334#endif /* CONFIG_NFS_V4_1 */
256 335
257#endif /* FS_NFS_PNFS_H */ 336#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b8ec170f2a0f..ac40b8535d7e 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -177,7 +177,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
177} 177}
178 178
179static int 179static int
180nfs_proc_lookup(struct inode *dir, struct qstr *name, 180nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
181 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 181 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
182{ 182{
183 struct nfs_diropargs arg = { 183 struct nfs_diropargs arg = {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 47a3ad63e0d5..af0c6279a4a7 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -59,6 +59,7 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
59 } 59 }
60 return p; 60 return p;
61} 61}
62EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
62 63
63void nfs_commit_free(struct nfs_write_data *p) 64void nfs_commit_free(struct nfs_write_data *p)
64{ 65{
@@ -66,6 +67,7 @@ void nfs_commit_free(struct nfs_write_data *p)
66 kfree(p->pagevec); 67 kfree(p->pagevec);
67 mempool_free(p, nfs_commit_mempool); 68 mempool_free(p, nfs_commit_mempool);
68} 69}
70EXPORT_SYMBOL_GPL(nfs_commit_free);
69 71
70struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 72struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
71{ 73{
@@ -179,8 +181,8 @@ static int wb_priority(struct writeback_control *wbc)
179 if (wbc->for_reclaim) 181 if (wbc->for_reclaim)
180 return FLUSH_HIGHPRI | FLUSH_STABLE; 182 return FLUSH_HIGHPRI | FLUSH_STABLE;
181 if (wbc->for_kupdate || wbc->for_background) 183 if (wbc->for_kupdate || wbc->for_background)
182 return FLUSH_LOWPRI; 184 return FLUSH_LOWPRI | FLUSH_COND_STABLE;
183 return 0; 185 return FLUSH_COND_STABLE;
184} 186}
185 187
186/* 188/*
@@ -387,11 +389,8 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
387 spin_lock(&inode->i_lock); 389 spin_lock(&inode->i_lock);
388 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); 390 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
389 BUG_ON(error); 391 BUG_ON(error);
390 if (!nfsi->npages) { 392 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
391 igrab(inode); 393 nfsi->change_attr++;
392 if (nfs_have_delegation(inode, FMODE_WRITE))
393 nfsi->change_attr++;
394 }
395 set_bit(PG_MAPPED, &req->wb_flags); 394 set_bit(PG_MAPPED, &req->wb_flags);
396 SetPagePrivate(req->wb_page); 395 SetPagePrivate(req->wb_page);
397 set_page_private(req->wb_page, (unsigned long)req); 396 set_page_private(req->wb_page, (unsigned long)req);
@@ -421,11 +420,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
421 clear_bit(PG_MAPPED, &req->wb_flags); 420 clear_bit(PG_MAPPED, &req->wb_flags);
422 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); 421 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
423 nfsi->npages--; 422 nfsi->npages--;
424 if (!nfsi->npages) { 423 spin_unlock(&inode->i_lock);
425 spin_unlock(&inode->i_lock);
426 iput(inode);
427 } else
428 spin_unlock(&inode->i_lock);
429 nfs_release_request(req); 424 nfs_release_request(req);
430} 425}
431 426
@@ -441,7 +436,7 @@ nfs_mark_request_dirty(struct nfs_page *req)
441 * Add a request to the inode's commit list. 436 * Add a request to the inode's commit list.
442 */ 437 */
443static void 438static void
444nfs_mark_request_commit(struct nfs_page *req) 439nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
445{ 440{
446 struct inode *inode = req->wb_context->path.dentry->d_inode; 441 struct inode *inode = req->wb_context->path.dentry->d_inode;
447 struct nfs_inode *nfsi = NFS_I(inode); 442 struct nfs_inode *nfsi = NFS_I(inode);
@@ -453,6 +448,7 @@ nfs_mark_request_commit(struct nfs_page *req)
453 NFS_PAGE_TAG_COMMIT); 448 NFS_PAGE_TAG_COMMIT);
454 nfsi->ncommit++; 449 nfsi->ncommit++;
455 spin_unlock(&inode->i_lock); 450 spin_unlock(&inode->i_lock);
451 pnfs_mark_request_commit(req, lseg);
456 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 452 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
457 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 453 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
458 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 454 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
@@ -474,14 +470,18 @@ nfs_clear_request_commit(struct nfs_page *req)
474static inline 470static inline
475int nfs_write_need_commit(struct nfs_write_data *data) 471int nfs_write_need_commit(struct nfs_write_data *data)
476{ 472{
477 return data->verf.committed != NFS_FILE_SYNC; 473 if (data->verf.committed == NFS_DATA_SYNC)
474 return data->lseg == NULL;
475 else
476 return data->verf.committed != NFS_FILE_SYNC;
478} 477}
479 478
480static inline 479static inline
481int nfs_reschedule_unstable_write(struct nfs_page *req) 480int nfs_reschedule_unstable_write(struct nfs_page *req,
481 struct nfs_write_data *data)
482{ 482{
483 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { 483 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
484 nfs_mark_request_commit(req); 484 nfs_mark_request_commit(req, data->lseg);
485 return 1; 485 return 1;
486 } 486 }
487 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { 487 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
@@ -492,7 +492,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
492} 492}
493#else 493#else
494static inline void 494static inline void
495nfs_mark_request_commit(struct nfs_page *req) 495nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
496{ 496{
497} 497}
498 498
@@ -509,7 +509,8 @@ int nfs_write_need_commit(struct nfs_write_data *data)
509} 509}
510 510
511static inline 511static inline
512int nfs_reschedule_unstable_write(struct nfs_page *req) 512int nfs_reschedule_unstable_write(struct nfs_page *req,
513 struct nfs_write_data *data)
513{ 514{
514 return 0; 515 return 0;
515} 516}
@@ -612,9 +613,11 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
612 } 613 }
613 614
614 if (nfs_clear_request_commit(req) && 615 if (nfs_clear_request_commit(req) &&
615 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, 616 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
616 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) 617 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) {
617 NFS_I(inode)->ncommit--; 618 NFS_I(inode)->ncommit--;
619 pnfs_clear_request_commit(req);
620 }
618 621
619 /* Okay, the request matches. Update the region */ 622 /* Okay, the request matches. Update the region */
620 if (offset < req->wb_offset) { 623 if (offset < req->wb_offset) {
@@ -762,11 +765,12 @@ int nfs_updatepage(struct file *file, struct page *page,
762 return status; 765 return status;
763} 766}
764 767
765static void nfs_writepage_release(struct nfs_page *req) 768static void nfs_writepage_release(struct nfs_page *req,
769 struct nfs_write_data *data)
766{ 770{
767 struct page *page = req->wb_page; 771 struct page *page = req->wb_page;
768 772
769 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) 773 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
770 nfs_inode_remove_request(req); 774 nfs_inode_remove_request(req);
771 nfs_clear_page_tag_locked(req); 775 nfs_clear_page_tag_locked(req);
772 nfs_end_page_writeback(page); 776 nfs_end_page_writeback(page);
@@ -863,7 +867,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
863 data->args.context = get_nfs_open_context(req->wb_context); 867 data->args.context = get_nfs_open_context(req->wb_context);
864 data->args.lock_context = req->wb_lock_context; 868 data->args.lock_context = req->wb_lock_context;
865 data->args.stable = NFS_UNSTABLE; 869 data->args.stable = NFS_UNSTABLE;
866 if (how & FLUSH_STABLE) { 870 if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
867 data->args.stable = NFS_DATA_SYNC; 871 data->args.stable = NFS_DATA_SYNC;
868 if (!nfs_need_commit(NFS_I(inode))) 872 if (!nfs_need_commit(NFS_I(inode)))
869 data->args.stable = NFS_FILE_SYNC; 873 data->args.stable = NFS_FILE_SYNC;
@@ -912,6 +916,12 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
912 916
913 nfs_list_remove_request(req); 917 nfs_list_remove_request(req);
914 918
919 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
920 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
921 desc->pg_count > wsize))
922 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
923
924
915 nbytes = desc->pg_count; 925 nbytes = desc->pg_count;
916 do { 926 do {
917 size_t len = min(nbytes, wsize); 927 size_t len = min(nbytes, wsize);
@@ -1002,6 +1012,10 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
1002 if ((!lseg) && list_is_singular(&data->pages)) 1012 if ((!lseg) && list_is_singular(&data->pages))
1003 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); 1013 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
1004 1014
1015 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1016 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
1017 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1018
1005 /* Set up the argument struct */ 1019 /* Set up the argument struct */
1006 ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); 1020 ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
1007out: 1021out:
@@ -1074,7 +1088,7 @@ static void nfs_writeback_release_partial(void *calldata)
1074 1088
1075out: 1089out:
1076 if (atomic_dec_and_test(&req->wb_complete)) 1090 if (atomic_dec_and_test(&req->wb_complete))
1077 nfs_writepage_release(req); 1091 nfs_writepage_release(req, data);
1078 nfs_writedata_release(calldata); 1092 nfs_writedata_release(calldata);
1079} 1093}
1080 1094
@@ -1141,7 +1155,7 @@ static void nfs_writeback_release_full(void *calldata)
1141 1155
1142 if (nfs_write_need_commit(data)) { 1156 if (nfs_write_need_commit(data)) {
1143 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1157 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1144 nfs_mark_request_commit(req); 1158 nfs_mark_request_commit(req, data->lseg);
1145 dprintk(" marked for commit\n"); 1159 dprintk(" marked for commit\n");
1146 goto next; 1160 goto next;
1147 } 1161 }
@@ -1251,57 +1265,82 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1251#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1265#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1252static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) 1266static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1253{ 1267{
1268 int ret;
1269
1254 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) 1270 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
1255 return 1; 1271 return 1;
1256 if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags, 1272 if (!may_wait)
1257 NFS_INO_COMMIT, nfs_wait_bit_killable, 1273 return 0;
1258 TASK_KILLABLE)) 1274 ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
1259 return 1; 1275 NFS_INO_COMMIT,
1260 return 0; 1276 nfs_wait_bit_killable,
1277 TASK_KILLABLE);
1278 return (ret < 0) ? ret : 1;
1261} 1279}
1262 1280
1263static void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1281void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1264{ 1282{
1265 clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1283 clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1266 smp_mb__after_clear_bit(); 1284 smp_mb__after_clear_bit();
1267 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); 1285 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1268} 1286}
1287EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
1269 1288
1270 1289void nfs_commitdata_release(void *data)
1271static void nfs_commitdata_release(void *data)
1272{ 1290{
1273 struct nfs_write_data *wdata = data; 1291 struct nfs_write_data *wdata = data;
1274 1292
1293 put_lseg(wdata->lseg);
1275 put_nfs_open_context(wdata->args.context); 1294 put_nfs_open_context(wdata->args.context);
1276 nfs_commit_free(wdata); 1295 nfs_commit_free(wdata);
1277} 1296}
1297EXPORT_SYMBOL_GPL(nfs_commitdata_release);
1278 1298
1279/* 1299int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
1280 * Set up the argument/result storage required for the RPC call. 1300 const struct rpc_call_ops *call_ops,
1281 */ 1301 int how)
1282static int nfs_commit_rpcsetup(struct list_head *head,
1283 struct nfs_write_data *data,
1284 int how)
1285{ 1302{
1286 struct nfs_page *first = nfs_list_entry(head->next);
1287 struct inode *inode = first->wb_context->path.dentry->d_inode;
1288 int priority = flush_task_priority(how);
1289 struct rpc_task *task; 1303 struct rpc_task *task;
1304 int priority = flush_task_priority(how);
1290 struct rpc_message msg = { 1305 struct rpc_message msg = {
1291 .rpc_argp = &data->args, 1306 .rpc_argp = &data->args,
1292 .rpc_resp = &data->res, 1307 .rpc_resp = &data->res,
1293 .rpc_cred = first->wb_context->cred, 1308 .rpc_cred = data->cred,
1294 }; 1309 };
1295 struct rpc_task_setup task_setup_data = { 1310 struct rpc_task_setup task_setup_data = {
1296 .task = &data->task, 1311 .task = &data->task,
1297 .rpc_client = NFS_CLIENT(inode), 1312 .rpc_client = clnt,
1298 .rpc_message = &msg, 1313 .rpc_message = &msg,
1299 .callback_ops = &nfs_commit_ops, 1314 .callback_ops = call_ops,
1300 .callback_data = data, 1315 .callback_data = data,
1301 .workqueue = nfsiod_workqueue, 1316 .workqueue = nfsiod_workqueue,
1302 .flags = RPC_TASK_ASYNC, 1317 .flags = RPC_TASK_ASYNC,
1303 .priority = priority, 1318 .priority = priority,
1304 }; 1319 };
1320 /* Set up the initial task struct. */
1321 NFS_PROTO(data->inode)->commit_setup(data, &msg);
1322
1323 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
1324
1325 task = rpc_run_task(&task_setup_data);
1326 if (IS_ERR(task))
1327 return PTR_ERR(task);
1328 if (how & FLUSH_SYNC)
1329 rpc_wait_for_completion_task(task);
1330 rpc_put_task(task);
1331 return 0;
1332}
1333EXPORT_SYMBOL_GPL(nfs_initiate_commit);
1334
1335/*
1336 * Set up the argument/result storage required for the RPC call.
1337 */
1338void nfs_init_commit(struct nfs_write_data *data,
1339 struct list_head *head,
1340 struct pnfs_layout_segment *lseg)
1341{
1342 struct nfs_page *first = nfs_list_entry(head->next);
1343 struct inode *inode = first->wb_context->path.dentry->d_inode;
1305 1344
1306 /* Set up the RPC argument and reply structs 1345 /* Set up the RPC argument and reply structs
1307 * NB: take care not to mess about with data->commit et al. */ 1346 * NB: take care not to mess about with data->commit et al. */
@@ -1309,7 +1348,9 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1309 list_splice_init(head, &data->pages); 1348 list_splice_init(head, &data->pages);
1310 1349
1311 data->inode = inode; 1350 data->inode = inode;
1312 data->cred = msg.rpc_cred; 1351 data->cred = first->wb_context->cred;
1352 data->lseg = lseg; /* reference transferred */
1353 data->mds_ops = &nfs_commit_ops;
1313 1354
1314 data->args.fh = NFS_FH(data->inode); 1355 data->args.fh = NFS_FH(data->inode);
1315 /* Note: we always request a commit of the entire inode */ 1356 /* Note: we always request a commit of the entire inode */
@@ -1320,20 +1361,25 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1320 data->res.fattr = &data->fattr; 1361 data->res.fattr = &data->fattr;
1321 data->res.verf = &data->verf; 1362 data->res.verf = &data->verf;
1322 nfs_fattr_init(&data->fattr); 1363 nfs_fattr_init(&data->fattr);
1364}
1365EXPORT_SYMBOL_GPL(nfs_init_commit);
1323 1366
1324 /* Set up the initial task struct. */ 1367void nfs_retry_commit(struct list_head *page_list,
1325 NFS_PROTO(inode)->commit_setup(data, &msg); 1368 struct pnfs_layout_segment *lseg)
1326 1369{
1327 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); 1370 struct nfs_page *req;
1328 1371
1329 task = rpc_run_task(&task_setup_data); 1372 while (!list_empty(page_list)) {
1330 if (IS_ERR(task)) 1373 req = nfs_list_entry(page_list->next);
1331 return PTR_ERR(task); 1374 nfs_list_remove_request(req);
1332 if (how & FLUSH_SYNC) 1375 nfs_mark_request_commit(req, lseg);
1333 rpc_wait_for_completion_task(task); 1376 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1334 rpc_put_task(task); 1377 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1335 return 0; 1378 BDI_RECLAIMABLE);
1379 nfs_clear_page_tag_locked(req);
1380 }
1336} 1381}
1382EXPORT_SYMBOL_GPL(nfs_retry_commit);
1337 1383
1338/* 1384/*
1339 * Commit dirty pages 1385 * Commit dirty pages
@@ -1342,7 +1388,6 @@ static int
1342nfs_commit_list(struct inode *inode, struct list_head *head, int how) 1388nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1343{ 1389{
1344 struct nfs_write_data *data; 1390 struct nfs_write_data *data;
1345 struct nfs_page *req;
1346 1391
1347 data = nfs_commitdata_alloc(); 1392 data = nfs_commitdata_alloc();
1348 1393
@@ -1350,17 +1395,10 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1350 goto out_bad; 1395 goto out_bad;
1351 1396
1352 /* Set up the argument struct */ 1397 /* Set up the argument struct */
1353 return nfs_commit_rpcsetup(head, data, how); 1398 nfs_init_commit(data, head, NULL);
1399 return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how);
1354 out_bad: 1400 out_bad:
1355 while (!list_empty(head)) { 1401 nfs_retry_commit(head, NULL);
1356 req = nfs_list_entry(head->next);
1357 nfs_list_remove_request(req);
1358 nfs_mark_request_commit(req);
1359 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1360 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1361 BDI_RECLAIMABLE);
1362 nfs_clear_page_tag_locked(req);
1363 }
1364 nfs_commit_clear_lock(NFS_I(inode)); 1402 nfs_commit_clear_lock(NFS_I(inode));
1365 return -ENOMEM; 1403 return -ENOMEM;
1366} 1404}
@@ -1380,10 +1418,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1380 return; 1418 return;
1381} 1419}
1382 1420
1383static void nfs_commit_release(void *calldata) 1421void nfs_commit_release_pages(struct nfs_write_data *data)
1384{ 1422{
1385 struct nfs_write_data *data = calldata; 1423 struct nfs_page *req;
1386 struct nfs_page *req;
1387 int status = data->task.tk_status; 1424 int status = data->task.tk_status;
1388 1425
1389 while (!list_empty(&data->pages)) { 1426 while (!list_empty(&data->pages)) {
@@ -1417,6 +1454,14 @@ static void nfs_commit_release(void *calldata)
1417 next: 1454 next:
1418 nfs_clear_page_tag_locked(req); 1455 nfs_clear_page_tag_locked(req);
1419 } 1456 }
1457}
1458EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
1459
1460static void nfs_commit_release(void *calldata)
1461{
1462 struct nfs_write_data *data = calldata;
1463
1464 nfs_commit_release_pages(data);
1420 nfs_commit_clear_lock(NFS_I(data->inode)); 1465 nfs_commit_clear_lock(NFS_I(data->inode));
1421 nfs_commitdata_release(calldata); 1466 nfs_commitdata_release(calldata);
1422} 1467}
@@ -1433,23 +1478,30 @@ int nfs_commit_inode(struct inode *inode, int how)
1433{ 1478{
1434 LIST_HEAD(head); 1479 LIST_HEAD(head);
1435 int may_wait = how & FLUSH_SYNC; 1480 int may_wait = how & FLUSH_SYNC;
1436 int res = 0; 1481 int res;
1437 1482
1438 if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) 1483 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1484 if (res <= 0)
1439 goto out_mark_dirty; 1485 goto out_mark_dirty;
1440 spin_lock(&inode->i_lock); 1486 spin_lock(&inode->i_lock);
1441 res = nfs_scan_commit(inode, &head, 0, 0); 1487 res = nfs_scan_commit(inode, &head, 0, 0);
1442 spin_unlock(&inode->i_lock); 1488 spin_unlock(&inode->i_lock);
1443 if (res) { 1489 if (res) {
1444 int error = nfs_commit_list(inode, &head, how); 1490 int error;
1491
1492 error = pnfs_commit_list(inode, &head, how);
1493 if (error == PNFS_NOT_ATTEMPTED)
1494 error = nfs_commit_list(inode, &head, how);
1445 if (error < 0) 1495 if (error < 0)
1446 return error; 1496 return error;
1447 if (may_wait) 1497 if (!may_wait)
1448 wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
1449 nfs_wait_bit_killable,
1450 TASK_KILLABLE);
1451 else
1452 goto out_mark_dirty; 1498 goto out_mark_dirty;
1499 error = wait_on_bit(&NFS_I(inode)->flags,
1500 NFS_INO_COMMIT,
1501 nfs_wait_bit_killable,
1502 TASK_KILLABLE);
1503 if (error < 0)
1504 return error;
1453 } else 1505 } else
1454 nfs_commit_clear_lock(NFS_I(inode)); 1506 nfs_commit_clear_lock(NFS_I(inode));
1455 return res; 1507 return res;
@@ -1503,7 +1555,22 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1503 1555
1504int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) 1556int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1505{ 1557{
1506 return nfs_commit_unstable_pages(inode, wbc); 1558 int ret;
1559
1560 ret = nfs_commit_unstable_pages(inode, wbc);
1561 if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) {
1562 int status;
1563 bool sync = true;
1564
1565 if (wbc->sync_mode == WB_SYNC_NONE || wbc->nonblocking ||
1566 wbc->for_background)
1567 sync = false;
1568
1569 status = pnfs_layoutcommit_inode(inode, sync);
1570 if (status < 0)
1571 return status;
1572 }
1573 return ret;
1507} 1574}
1508 1575
1509/* 1576/*
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index 84c27d69d421..ec0f277be7f5 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -117,7 +117,6 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
117 * invoked in contexts where a memory allocation failure is 117 * invoked in contexts where a memory allocation failure is
118 * fatal. Fortunately this fake ACL is small enough to 118 * fatal. Fortunately this fake ACL is small enough to
119 * construct on the stack. */ 119 * construct on the stack. */
120 memset(acl2, 0, sizeof(acl2));
121 posix_acl_init(acl2, 4); 120 posix_acl_init(acl2, 4);
122 121
123 /* Insert entries in canonical order: other orders seem 122 /* Insert entries in canonical order: other orders seem
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 8b31e5f8795d..ad000aeb21a2 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -299,7 +299,6 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
299 299
300#define EXPORT_HASHBITS 8 300#define EXPORT_HASHBITS 8
301#define EXPORT_HASHMAX (1<< EXPORT_HASHBITS) 301#define EXPORT_HASHMAX (1<< EXPORT_HASHBITS)
302#define EXPORT_HASHMASK (EXPORT_HASHMAX -1)
303 302
304static struct cache_head *export_table[EXPORT_HASHMAX]; 303static struct cache_head *export_table[EXPORT_HASHMAX];
305 304
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 6d2c397d458b..55780a22fdbd 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -63,7 +63,6 @@ struct ent {
63 63
64#define ENT_HASHBITS 8 64#define ENT_HASHBITS 8
65#define ENT_HASHMAX (1 << ENT_HASHBITS) 65#define ENT_HASHMAX (1 << ENT_HASHBITS)
66#define ENT_HASHMASK (ENT_HASHMAX - 1)
67 66
68static void 67static void
69ent_init(struct cache_head *cnew, struct cache_head *citm) 68ent_init(struct cache_head *cnew, struct cache_head *citm)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index db52546143d1..5fcb1396a7e3 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -984,8 +984,8 @@ typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
984 void *); 984 void *);
985enum nfsd4_op_flags { 985enum nfsd4_op_flags {
986 ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ 986 ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
987 ALLOWED_ON_ABSENT_FS = 2 << 0, /* ops processed on absent fs */ 987 ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */
988 ALLOWED_AS_FIRST_OP = 3 << 0, /* ops reqired first in compound */ 988 ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */
989}; 989};
990 990
991struct nfsd4_operation { 991struct nfsd4_operation {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 7b566ec14e18..fbde6f79922e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -148,7 +148,7 @@ static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
148/* hash table for nfs4_file */ 148/* hash table for nfs4_file */
149#define FILE_HASH_BITS 8 149#define FILE_HASH_BITS 8
150#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) 150#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
151#define FILE_HASH_MASK (FILE_HASH_SIZE - 1) 151
152/* hash table for (open)nfs4_stateid */ 152/* hash table for (open)nfs4_stateid */
153#define STATEID_HASH_BITS 10 153#define STATEID_HASH_BITS 10
154#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS) 154#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS)
@@ -316,64 +316,6 @@ static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE];
316static struct list_head client_lru; 316static struct list_head client_lru;
317static struct list_head close_lru; 317static struct list_head close_lru;
318 318
319static void unhash_generic_stateid(struct nfs4_stateid *stp)
320{
321 list_del(&stp->st_hash);
322 list_del(&stp->st_perfile);
323 list_del(&stp->st_perstateowner);
324}
325
326static void free_generic_stateid(struct nfs4_stateid *stp)
327{
328 put_nfs4_file(stp->st_file);
329 kmem_cache_free(stateid_slab, stp);
330}
331
332static void release_lock_stateid(struct nfs4_stateid *stp)
333{
334 struct file *file;
335
336 unhash_generic_stateid(stp);
337 file = find_any_file(stp->st_file);
338 if (file)
339 locks_remove_posix(file, (fl_owner_t)stp->st_stateowner);
340 free_generic_stateid(stp);
341}
342
343static void unhash_lockowner(struct nfs4_stateowner *sop)
344{
345 struct nfs4_stateid *stp;
346
347 list_del(&sop->so_idhash);
348 list_del(&sop->so_strhash);
349 list_del(&sop->so_perstateid);
350 while (!list_empty(&sop->so_stateids)) {
351 stp = list_first_entry(&sop->so_stateids,
352 struct nfs4_stateid, st_perstateowner);
353 release_lock_stateid(stp);
354 }
355}
356
357static void release_lockowner(struct nfs4_stateowner *sop)
358{
359 unhash_lockowner(sop);
360 nfs4_put_stateowner(sop);
361}
362
363static void
364release_stateid_lockowners(struct nfs4_stateid *open_stp)
365{
366 struct nfs4_stateowner *lock_sop;
367
368 while (!list_empty(&open_stp->st_lockowners)) {
369 lock_sop = list_entry(open_stp->st_lockowners.next,
370 struct nfs4_stateowner, so_perstateid);
371 /* list_del(&open_stp->st_lockowners); */
372 BUG_ON(lock_sop->so_is_open_owner);
373 release_lockowner(lock_sop);
374 }
375}
376
377/* 319/*
378 * We store the NONE, READ, WRITE, and BOTH bits separately in the 320 * We store the NONE, READ, WRITE, and BOTH bits separately in the
379 * st_{access,deny}_bmap field of the stateid, in order to track not 321 * st_{access,deny}_bmap field of the stateid, in order to track not
@@ -446,13 +388,71 @@ static int nfs4_access_bmap_to_omode(struct nfs4_stateid *stp)
446 return nfs4_access_to_omode(access); 388 return nfs4_access_to_omode(access);
447} 389}
448 390
449static void release_open_stateid(struct nfs4_stateid *stp) 391static void unhash_generic_stateid(struct nfs4_stateid *stp)
392{
393 list_del(&stp->st_hash);
394 list_del(&stp->st_perfile);
395 list_del(&stp->st_perstateowner);
396}
397
398static void free_generic_stateid(struct nfs4_stateid *stp)
450{ 399{
451 int oflag = nfs4_access_bmap_to_omode(stp); 400 int oflag = nfs4_access_bmap_to_omode(stp);
452 401
402 nfs4_file_put_access(stp->st_file, oflag);
403 put_nfs4_file(stp->st_file);
404 kmem_cache_free(stateid_slab, stp);
405}
406
407static void release_lock_stateid(struct nfs4_stateid *stp)
408{
409 struct file *file;
410
411 unhash_generic_stateid(stp);
412 file = find_any_file(stp->st_file);
413 if (file)
414 locks_remove_posix(file, (fl_owner_t)stp->st_stateowner);
415 free_generic_stateid(stp);
416}
417
418static void unhash_lockowner(struct nfs4_stateowner *sop)
419{
420 struct nfs4_stateid *stp;
421
422 list_del(&sop->so_idhash);
423 list_del(&sop->so_strhash);
424 list_del(&sop->so_perstateid);
425 while (!list_empty(&sop->so_stateids)) {
426 stp = list_first_entry(&sop->so_stateids,
427 struct nfs4_stateid, st_perstateowner);
428 release_lock_stateid(stp);
429 }
430}
431
432static void release_lockowner(struct nfs4_stateowner *sop)
433{
434 unhash_lockowner(sop);
435 nfs4_put_stateowner(sop);
436}
437
438static void
439release_stateid_lockowners(struct nfs4_stateid *open_stp)
440{
441 struct nfs4_stateowner *lock_sop;
442
443 while (!list_empty(&open_stp->st_lockowners)) {
444 lock_sop = list_entry(open_stp->st_lockowners.next,
445 struct nfs4_stateowner, so_perstateid);
446 /* list_del(&open_stp->st_lockowners); */
447 BUG_ON(lock_sop->so_is_open_owner);
448 release_lockowner(lock_sop);
449 }
450}
451
452static void release_open_stateid(struct nfs4_stateid *stp)
453{
453 unhash_generic_stateid(stp); 454 unhash_generic_stateid(stp);
454 release_stateid_lockowners(stp); 455 release_stateid_lockowners(stp);
455 nfs4_file_put_access(stp->st_file, oflag);
456 free_generic_stateid(stp); 456 free_generic_stateid(stp);
457} 457}
458 458
@@ -608,7 +608,8 @@ static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4
608 u32 maxrpc = nfsd_serv->sv_max_mesg; 608 u32 maxrpc = nfsd_serv->sv_max_mesg;
609 609
610 new->maxreqs = numslots; 610 new->maxreqs = numslots;
611 new->maxresp_cached = slotsize + NFSD_MIN_HDR_SEQ_SZ; 611 new->maxresp_cached = min_t(u32, req->maxresp_cached,
612 slotsize + NFSD_MIN_HDR_SEQ_SZ);
612 new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc); 613 new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc);
613 new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc); 614 new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc);
614 new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND); 615 new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND);
@@ -3735,6 +3736,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
3735 stp->st_stateid.si_stateownerid = sop->so_id; 3736 stp->st_stateid.si_stateownerid = sop->so_id;
3736 stp->st_stateid.si_fileid = fp->fi_id; 3737 stp->st_stateid.si_fileid = fp->fi_id;
3737 stp->st_stateid.si_generation = 0; 3738 stp->st_stateid.si_generation = 0;
3739 stp->st_access_bmap = 0;
3738 stp->st_deny_bmap = open_stp->st_deny_bmap; 3740 stp->st_deny_bmap = open_stp->st_deny_bmap;
3739 stp->st_openstp = open_stp; 3741 stp->st_openstp = open_stp;
3740 3742
@@ -3749,6 +3751,17 @@ check_lock_length(u64 offset, u64 length)
3749 LOFF_OVERFLOW(offset, length))); 3751 LOFF_OVERFLOW(offset, length)));
3750} 3752}
3751 3753
3754static void get_lock_access(struct nfs4_stateid *lock_stp, u32 access)
3755{
3756 struct nfs4_file *fp = lock_stp->st_file;
3757 int oflag = nfs4_access_to_omode(access);
3758
3759 if (test_bit(access, &lock_stp->st_access_bmap))
3760 return;
3761 nfs4_file_get_access(fp, oflag);
3762 __set_bit(access, &lock_stp->st_access_bmap);
3763}
3764
3752/* 3765/*
3753 * LOCK operation 3766 * LOCK operation
3754 */ 3767 */
@@ -3765,7 +3778,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3765 struct file_lock conflock; 3778 struct file_lock conflock;
3766 __be32 status = 0; 3779 __be32 status = 0;
3767 unsigned int strhashval; 3780 unsigned int strhashval;
3768 unsigned int cmd;
3769 int err; 3781 int err;
3770 3782
3771 dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", 3783 dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
@@ -3847,22 +3859,18 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3847 switch (lock->lk_type) { 3859 switch (lock->lk_type) {
3848 case NFS4_READ_LT: 3860 case NFS4_READ_LT:
3849 case NFS4_READW_LT: 3861 case NFS4_READW_LT:
3850 if (find_readable_file(lock_stp->st_file)) { 3862 filp = find_readable_file(lock_stp->st_file);
3851 nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_READ); 3863 if (filp)
3852 filp = find_readable_file(lock_stp->st_file); 3864 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
3853 }
3854 file_lock.fl_type = F_RDLCK; 3865 file_lock.fl_type = F_RDLCK;
3855 cmd = F_SETLK; 3866 break;
3856 break;
3857 case NFS4_WRITE_LT: 3867 case NFS4_WRITE_LT:
3858 case NFS4_WRITEW_LT: 3868 case NFS4_WRITEW_LT:
3859 if (find_writeable_file(lock_stp->st_file)) { 3869 filp = find_writeable_file(lock_stp->st_file);
3860 nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_WRITE); 3870 if (filp)
3861 filp = find_writeable_file(lock_stp->st_file); 3871 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
3862 }
3863 file_lock.fl_type = F_WRLCK; 3872 file_lock.fl_type = F_WRLCK;
3864 cmd = F_SETLK; 3873 break;
3865 break;
3866 default: 3874 default:
3867 status = nfserr_inval; 3875 status = nfserr_inval;
3868 goto out; 3876 goto out;
@@ -3886,7 +3894,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3886 * Note: locks.c uses the BKL to protect the inode's lock list. 3894 * Note: locks.c uses the BKL to protect the inode's lock list.
3887 */ 3895 */
3888 3896
3889 err = vfs_lock_file(filp, cmd, &file_lock, &conflock); 3897 err = vfs_lock_file(filp, F_SETLK, &file_lock, &conflock);
3890 switch (-err) { 3898 switch (-err) {
3891 case 0: /* success! */ 3899 case 0: /* success! */
3892 update_stateid(&lock_stp->st_stateid); 3900 update_stateid(&lock_stp->st_stateid);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 615f0a9f0600..c6766af00d98 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1142,7 +1142,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1142 1142
1143 u32 dummy; 1143 u32 dummy;
1144 char *machine_name; 1144 char *machine_name;
1145 int i, j; 1145 int i;
1146 int nr_secflavs; 1146 int nr_secflavs;
1147 1147
1148 READ_BUF(16); 1148 READ_BUF(16);
@@ -1215,8 +1215,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1215 READ_BUF(4); 1215 READ_BUF(4);
1216 READ32(dummy); 1216 READ32(dummy);
1217 READ_BUF(dummy * 4); 1217 READ_BUF(dummy * 4);
1218 for (j = 0; j < dummy; ++j)
1219 READ32(dummy);
1220 break; 1218 break;
1221 case RPC_AUTH_GSS: 1219 case RPC_AUTH_GSS:
1222 dprintk("RPC_AUTH_GSS callback secflavor " 1220 dprintk("RPC_AUTH_GSS callback secflavor "
@@ -1232,7 +1230,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1232 READ_BUF(4); 1230 READ_BUF(4);
1233 READ32(dummy); 1231 READ32(dummy);
1234 READ_BUF(dummy); 1232 READ_BUF(dummy);
1235 p += XDR_QUADLEN(dummy);
1236 break; 1233 break;
1237 default: 1234 default:
1238 dprintk("Illegal callback secflavor\n"); 1235 dprintk("Illegal callback secflavor\n");
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 33b3e2b06779..1f5eae40f34e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -12,13 +12,14 @@
12#include <linux/nfsd/syscall.h> 12#include <linux/nfsd/syscall.h>
13#include <linux/lockd/lockd.h> 13#include <linux/lockd/lockd.h>
14#include <linux/sunrpc/clnt.h> 14#include <linux/sunrpc/clnt.h>
15#include <linux/sunrpc/gss_api.h>
15 16
16#include "idmap.h" 17#include "idmap.h"
17#include "nfsd.h" 18#include "nfsd.h"
18#include "cache.h" 19#include "cache.h"
19 20
20/* 21/*
21 * We have a single directory with 9 nodes in it. 22 * We have a single directory with several nodes in it.
22 */ 23 */
23enum { 24enum {
24 NFSD_Root = 1, 25 NFSD_Root = 1,
@@ -42,6 +43,7 @@ enum {
42 NFSD_Versions, 43 NFSD_Versions,
43 NFSD_Ports, 44 NFSD_Ports,
44 NFSD_MaxBlkSize, 45 NFSD_MaxBlkSize,
46 NFSD_SupportedEnctypes,
45 /* 47 /*
46 * The below MUST come last. Otherwise we leave a hole in nfsd_files[] 48 * The below MUST come last. Otherwise we leave a hole in nfsd_files[]
47 * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops 49 * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
@@ -187,6 +189,34 @@ static struct file_operations export_features_operations = {
187 .release = single_release, 189 .release = single_release,
188}; 190};
189 191
192#ifdef CONFIG_SUNRPC_GSS
193static int supported_enctypes_show(struct seq_file *m, void *v)
194{
195 struct gss_api_mech *k5mech;
196
197 k5mech = gss_mech_get_by_name("krb5");
198 if (k5mech == NULL)
199 goto out;
200 if (k5mech->gm_upcall_enctypes != NULL)
201 seq_printf(m, k5mech->gm_upcall_enctypes);
202 gss_mech_put(k5mech);
203out:
204 return 0;
205}
206
207static int supported_enctypes_open(struct inode *inode, struct file *file)
208{
209 return single_open(file, supported_enctypes_show, NULL);
210}
211
212static struct file_operations supported_enctypes_ops = {
213 .open = supported_enctypes_open,
214 .read = seq_read,
215 .llseek = seq_lseek,
216 .release = single_release,
217};
218#endif /* CONFIG_SUNRPC_GSS */
219
190extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); 220extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
191extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); 221extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
192 222
@@ -1397,6 +1427,9 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1397 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, 1427 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
1398 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, 1428 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
1399 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, 1429 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
1430#ifdef CONFIG_SUNRPC_GSS
1431 [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
1432#endif /* CONFIG_SUNRPC_GSS */
1400#ifdef CONFIG_NFSD_V4 1433#ifdef CONFIG_NFSD_V4
1401 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, 1434 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
1402 [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, 1435 [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 2d31224b07bf..6bd2f3c21f2b 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -367,16 +367,12 @@ struct nfs4_file {
367 struct list_head fi_delegations; 367 struct list_head fi_delegations;
368 /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ 368 /* One each for O_RDONLY, O_WRONLY, O_RDWR: */
369 struct file * fi_fds[3]; 369 struct file * fi_fds[3];
370 /* One each for O_RDONLY, O_WRONLY: */
371 atomic_t fi_access[2];
372 /* 370 /*
373 * Each open stateid contributes 1 to either fi_readers or 371 * Each open or lock stateid contributes 1 to either
374 * fi_writers, or both, depending on the open mode. A 372 * fi_access[O_RDONLY], fi_access[O_WRONLY], or both, depending
375 * delegation also takes an fi_readers reference. Lock 373 * on open or lock mode:
376 * stateid's take none.
377 */ 374 */
378 atomic_t fi_readers; 375 atomic_t fi_access[2];
379 atomic_t fi_writers;
380 struct file *fi_deleg_file; 376 struct file *fi_deleg_file;
381 struct file_lock *fi_lease; 377 struct file_lock *fi_lease;
382 atomic_t fi_delegees; 378 atomic_t fi_delegees;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ff93025ae2f7..2e1cebde90df 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1749,8 +1749,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1749 if (host_err) 1749 if (host_err)
1750 goto out_drop_write; 1750 goto out_drop_write;
1751 } 1751 }
1752 if (host_err)
1753 goto out_drop_write;
1754 host_err = vfs_rename(fdir, odentry, tdir, ndentry); 1752 host_err = vfs_rename(fdir, odentry, tdir, ndentry);
1755 if (!host_err) { 1753 if (!host_err) {
1756 host_err = commit_metadata(tfhp); 1754 host_err = commit_metadata(tfhp);
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index d7fd696e595c..0a0a66d98cce 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -521,8 +521,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
521 group_offset, bitmap)) 521 group_offset, bitmap))
522 printk(KERN_WARNING "%s: entry number %llu already freed\n", 522 printk(KERN_WARNING "%s: entry number %llu already freed\n",
523 __func__, (unsigned long long)req->pr_entry_nr); 523 __func__, (unsigned long long)req->pr_entry_nr);
524 524 else
525 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); 525 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
526 526
527 kunmap(req->pr_bitmap_bh->b_page); 527 kunmap(req->pr_bitmap_bh->b_page);
528 kunmap(req->pr_desc_bh->b_page); 528 kunmap(req->pr_desc_bh->b_page);
@@ -558,8 +558,8 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
558 group_offset, bitmap)) 558 group_offset, bitmap))
559 printk(KERN_WARNING "%s: entry number %llu already freed\n", 559 printk(KERN_WARNING "%s: entry number %llu already freed\n",
560 __func__, (unsigned long long)req->pr_entry_nr); 560 __func__, (unsigned long long)req->pr_entry_nr);
561 561 else
562 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); 562 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
563 563
564 kunmap(req->pr_bitmap_bh->b_page); 564 kunmap(req->pr_bitmap_bh->b_page);
565 kunmap(req->pr_desc_bh->b_page); 565 kunmap(req->pr_desc_bh->b_page);
@@ -665,7 +665,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
665 for (j = i, n = 0; 665 for (j = i, n = 0;
666 (j < nitems) && nilfs_palloc_group_is_in(inode, group, 666 (j < nitems) && nilfs_palloc_group_is_in(inode, group,
667 entry_nrs[j]); 667 entry_nrs[j]);
668 j++, n++) { 668 j++) {
669 nilfs_palloc_group(inode, entry_nrs[j], &group_offset); 669 nilfs_palloc_group(inode, entry_nrs[j], &group_offset);
670 if (!nilfs_clear_bit_atomic( 670 if (!nilfs_clear_bit_atomic(
671 nilfs_mdt_bgl_lock(inode, group), 671 nilfs_mdt_bgl_lock(inode, group),
@@ -674,6 +674,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
674 "%s: entry number %llu already freed\n", 674 "%s: entry number %llu already freed\n",
675 __func__, 675 __func__,
676 (unsigned long long)entry_nrs[j]); 676 (unsigned long long)entry_nrs[j]);
677 } else {
678 n++;
677 } 679 }
678 } 680 }
679 nilfs_palloc_group_desc_add_entries(inode, group, desc, n); 681 nilfs_palloc_group_desc_add_entries(inode, group, desc, n);
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index 9af34a7e6e13..f5fde36b9e28 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -74,7 +74,7 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t);
74 74
75#define nilfs_set_bit_atomic ext2_set_bit_atomic 75#define nilfs_set_bit_atomic ext2_set_bit_atomic
76#define nilfs_clear_bit_atomic ext2_clear_bit_atomic 76#define nilfs_clear_bit_atomic ext2_clear_bit_atomic
77#define nilfs_find_next_zero_bit ext2_find_next_zero_bit 77#define nilfs_find_next_zero_bit find_next_zero_bit_le
78 78
79/* 79/*
80 * persistent object allocator cache 80 * persistent object allocator cache
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 3ee67c67cc52..4723f04e9b12 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -25,7 +25,6 @@
25#include <linux/errno.h> 25#include <linux/errno.h>
26#include "nilfs.h" 26#include "nilfs.h"
27#include "bmap.h" 27#include "bmap.h"
28#include "sb.h"
29#include "btree.h" 28#include "btree.h"
30#include "direct.h" 29#include "direct.h"
31#include "btnode.h" 30#include "btnode.h"
@@ -425,17 +424,6 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap)
425/* 424/*
426 * Internal use only 425 * Internal use only
427 */ 426 */
428
429void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n)
430{
431 inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n);
432}
433
434void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n)
435{
436 inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n);
437}
438
439__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, 427__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
440 const struct buffer_head *bh) 428 const struct buffer_head *bh)
441{ 429{
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
index bde1c0aa2e15..40d9f453d31c 100644
--- a/fs/nilfs2/bmap.h
+++ b/fs/nilfs2/bmap.h
@@ -240,9 +240,6 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *,
240__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64); 240__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64);
241__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *); 241__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *);
242 242
243void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int);
244void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int);
245
246 243
247/* Assume that bmap semaphore is locked. */ 244/* Assume that bmap semaphore is locked. */
248static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap) 245static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap)
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 85f7baa15f5d..609cd223eea8 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -34,15 +34,10 @@
34#include "page.h" 34#include "page.h"
35#include "btnode.h" 35#include "btnode.h"
36 36
37
38static const struct address_space_operations def_btnode_aops = {
39 .sync_page = block_sync_page,
40};
41
42void nilfs_btnode_cache_init(struct address_space *btnc, 37void nilfs_btnode_cache_init(struct address_space *btnc,
43 struct backing_dev_info *bdi) 38 struct backing_dev_info *bdi)
44{ 39{
45 nilfs_mapping_init(btnc, bdi, &def_btnode_aops); 40 nilfs_mapping_init(btnc, bdi);
46} 41}
47 42
48void nilfs_btnode_cache_clear(struct address_space *btnc) 43void nilfs_btnode_cache_clear(struct address_space *btnc)
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 300c2bc00c3f..d451ae0e0bf3 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -1174,7 +1174,7 @@ static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1174 if (ret < 0) 1174 if (ret < 0)
1175 goto out; 1175 goto out;
1176 nilfs_btree_commit_insert(btree, path, level, key, ptr); 1176 nilfs_btree_commit_insert(btree, path, level, key, ptr);
1177 nilfs_bmap_add_blocks(btree, stats.bs_nblocks); 1177 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1178 1178
1179 out: 1179 out:
1180 nilfs_btree_free_path(path); 1180 nilfs_btree_free_path(path);
@@ -1511,7 +1511,7 @@ static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1511 if (ret < 0) 1511 if (ret < 0)
1512 goto out; 1512 goto out;
1513 nilfs_btree_commit_delete(btree, path, level, dat); 1513 nilfs_btree_commit_delete(btree, path, level, dat);
1514 nilfs_bmap_sub_blocks(btree, stats.bs_nblocks); 1514 nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1515 1515
1516out: 1516out:
1517 nilfs_btree_free_path(path); 1517 nilfs_btree_free_path(path);
@@ -1776,7 +1776,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1776 return ret; 1776 return ret;
1777 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, 1777 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1778 di, ni, bh); 1778 di, ni, bh);
1779 nilfs_bmap_add_blocks(btree, stats.bs_nblocks); 1779 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1780 return 0; 1780 return 0;
1781} 1781}
1782 1782
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 9d45773b79e6..3a1923943b14 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -440,7 +440,6 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
440 nilfs_commit_chunk(page, mapping, from, to); 440 nilfs_commit_chunk(page, mapping, from, to);
441 nilfs_put_page(page); 441 nilfs_put_page(page);
442 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 442 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
443/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */
444} 443}
445 444
446/* 445/*
@@ -531,7 +530,6 @@ got_it:
531 nilfs_set_de_type(de, inode); 530 nilfs_set_de_type(de, inode);
532 nilfs_commit_chunk(page, page->mapping, from, to); 531 nilfs_commit_chunk(page, page->mapping, from, to);
533 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 532 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
534/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */
535 nilfs_mark_inode_dirty(dir); 533 nilfs_mark_inode_dirty(dir);
536 /* OFFSET_CACHE */ 534 /* OFFSET_CACHE */
537out_put: 535out_put:
@@ -579,7 +577,6 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
579 dir->inode = 0; 577 dir->inode = 0;
580 nilfs_commit_chunk(page, mapping, from, to); 578 nilfs_commit_chunk(page, mapping, from, to);
581 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 579 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
582/* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */
583out: 580out:
584 nilfs_put_page(page); 581 nilfs_put_page(page);
585 return err; 582 return err;
@@ -684,7 +681,7 @@ const struct file_operations nilfs_dir_operations = {
684 .readdir = nilfs_readdir, 681 .readdir = nilfs_readdir,
685 .unlocked_ioctl = nilfs_ioctl, 682 .unlocked_ioctl = nilfs_ioctl,
686#ifdef CONFIG_COMPAT 683#ifdef CONFIG_COMPAT
687 .compat_ioctl = nilfs_ioctl, 684 .compat_ioctl = nilfs_compat_ioctl,
688#endif /* CONFIG_COMPAT */ 685#endif /* CONFIG_COMPAT */
689 .fsync = nilfs_sync_file, 686 .fsync = nilfs_sync_file,
690 687
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 324d80c57518..82f4865e86dd 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -146,7 +146,7 @@ static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
146 if (NILFS_BMAP_USE_VBN(bmap)) 146 if (NILFS_BMAP_USE_VBN(bmap))
147 nilfs_bmap_set_target_v(bmap, key, req.bpr_ptr); 147 nilfs_bmap_set_target_v(bmap, key, req.bpr_ptr);
148 148
149 nilfs_bmap_add_blocks(bmap, 1); 149 nilfs_inode_add_blocks(bmap->b_inode, 1);
150 } 150 }
151 return ret; 151 return ret;
152} 152}
@@ -168,7 +168,7 @@ static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key)
168 if (!ret) { 168 if (!ret) {
169 nilfs_bmap_commit_end_ptr(bmap, &req, dat); 169 nilfs_bmap_commit_end_ptr(bmap, &req, dat);
170 nilfs_direct_set_ptr(bmap, key, NILFS_BMAP_INVALID_PTR); 170 nilfs_direct_set_ptr(bmap, key, NILFS_BMAP_INVALID_PTR);
171 nilfs_bmap_sub_blocks(bmap, 1); 171 nilfs_inode_sub_blocks(bmap->b_inode, 1);
172 } 172 }
173 return ret; 173 return ret;
174} 174}
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 2f560c9fb808..93589fccdd97 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -59,7 +59,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
59 struct nilfs_transaction_info ti; 59 struct nilfs_transaction_info ti;
60 int ret; 60 int ret;
61 61
62 if (unlikely(nilfs_near_disk_full(NILFS_SB(inode->i_sb)->s_nilfs))) 62 if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info)))
63 return VM_FAULT_SIGBUS; /* -ENOSPC */ 63 return VM_FAULT_SIGBUS; /* -ENOSPC */
64 64
65 lock_page(page); 65 lock_page(page);
@@ -142,7 +142,7 @@ const struct file_operations nilfs_file_operations = {
142 .aio_write = generic_file_aio_write, 142 .aio_write = generic_file_aio_write,
143 .unlocked_ioctl = nilfs_ioctl, 143 .unlocked_ioctl = nilfs_ioctl,
144#ifdef CONFIG_COMPAT 144#ifdef CONFIG_COMPAT
145 .compat_ioctl = nilfs_ioctl, 145 .compat_ioctl = nilfs_compat_ioctl,
146#endif /* CONFIG_COMPAT */ 146#endif /* CONFIG_COMPAT */
147 .mmap = nilfs_file_mmap, 147 .mmap = nilfs_file_mmap,
148 .open = generic_file_open, 148 .open = generic_file_open,
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index caf9a6a3fb54..1c2a3e23f8b2 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -49,7 +49,6 @@
49#include "ifile.h" 49#include "ifile.h"
50 50
51static const struct address_space_operations def_gcinode_aops = { 51static const struct address_space_operations def_gcinode_aops = {
52 .sync_page = block_sync_page,
53}; 52};
54 53
55/* 54/*
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 2fd440d8d6b8..c0aa27490c02 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -41,6 +41,24 @@ struct nilfs_iget_args {
41 int for_gc; 41 int for_gc;
42}; 42};
43 43
44void nilfs_inode_add_blocks(struct inode *inode, int n)
45{
46 struct nilfs_root *root = NILFS_I(inode)->i_root;
47
48 inode_add_bytes(inode, (1 << inode->i_blkbits) * n);
49 if (root)
50 atomic_add(n, &root->blocks_count);
51}
52
53void nilfs_inode_sub_blocks(struct inode *inode, int n)
54{
55 struct nilfs_root *root = NILFS_I(inode)->i_root;
56
57 inode_sub_bytes(inode, (1 << inode->i_blkbits) * n);
58 if (root)
59 atomic_sub(n, &root->blocks_count);
60}
61
44/** 62/**
45 * nilfs_get_block() - get a file block on the filesystem (callback function) 63 * nilfs_get_block() - get a file block on the filesystem (callback function)
46 * @inode - inode struct of the target file 64 * @inode - inode struct of the target file
@@ -262,7 +280,6 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
262const struct address_space_operations nilfs_aops = { 280const struct address_space_operations nilfs_aops = {
263 .writepage = nilfs_writepage, 281 .writepage = nilfs_writepage,
264 .readpage = nilfs_readpage, 282 .readpage = nilfs_readpage,
265 .sync_page = block_sync_page,
266 .writepages = nilfs_writepages, 283 .writepages = nilfs_writepages,
267 .set_page_dirty = nilfs_set_page_dirty, 284 .set_page_dirty = nilfs_set_page_dirty,
268 .readpages = nilfs_readpages, 285 .readpages = nilfs_readpages,
@@ -277,7 +294,7 @@ const struct address_space_operations nilfs_aops = {
277struct inode *nilfs_new_inode(struct inode *dir, int mode) 294struct inode *nilfs_new_inode(struct inode *dir, int mode)
278{ 295{
279 struct super_block *sb = dir->i_sb; 296 struct super_block *sb = dir->i_sb;
280 struct nilfs_sb_info *sbi = NILFS_SB(sb); 297 struct the_nilfs *nilfs = sb->s_fs_info;
281 struct inode *inode; 298 struct inode *inode;
282 struct nilfs_inode_info *ii; 299 struct nilfs_inode_info *ii;
283 struct nilfs_root *root; 300 struct nilfs_root *root;
@@ -315,19 +332,16 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
315 /* No lock is needed; iget() ensures it. */ 332 /* No lock is needed; iget() ensures it. */
316 } 333 }
317 334
318 ii->i_flags = NILFS_I(dir)->i_flags; 335 ii->i_flags = nilfs_mask_flags(
319 if (S_ISLNK(mode)) 336 mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED);
320 ii->i_flags &= ~(NILFS_IMMUTABLE_FL | NILFS_APPEND_FL);
321 if (!S_ISDIR(mode))
322 ii->i_flags &= ~NILFS_DIRSYNC_FL;
323 337
324 /* ii->i_file_acl = 0; */ 338 /* ii->i_file_acl = 0; */
325 /* ii->i_dir_acl = 0; */ 339 /* ii->i_dir_acl = 0; */
326 ii->i_dir_start_lookup = 0; 340 ii->i_dir_start_lookup = 0;
327 nilfs_set_inode_flags(inode); 341 nilfs_set_inode_flags(inode);
328 spin_lock(&sbi->s_next_gen_lock); 342 spin_lock(&nilfs->ns_next_gen_lock);
329 inode->i_generation = sbi->s_next_generation++; 343 inode->i_generation = nilfs->ns_next_generation++;
330 spin_unlock(&sbi->s_next_gen_lock); 344 spin_unlock(&nilfs->ns_next_gen_lock);
331 insert_inode_hash(inode); 345 insert_inode_hash(inode);
332 346
333 err = nilfs_init_acl(inode, dir); 347 err = nilfs_init_acl(inode, dir);
@@ -359,17 +373,15 @@ void nilfs_set_inode_flags(struct inode *inode)
359 373
360 inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | 374 inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
361 S_DIRSYNC); 375 S_DIRSYNC);
362 if (flags & NILFS_SYNC_FL) 376 if (flags & FS_SYNC_FL)
363 inode->i_flags |= S_SYNC; 377 inode->i_flags |= S_SYNC;
364 if (flags & NILFS_APPEND_FL) 378 if (flags & FS_APPEND_FL)
365 inode->i_flags |= S_APPEND; 379 inode->i_flags |= S_APPEND;
366 if (flags & NILFS_IMMUTABLE_FL) 380 if (flags & FS_IMMUTABLE_FL)
367 inode->i_flags |= S_IMMUTABLE; 381 inode->i_flags |= S_IMMUTABLE;
368#ifndef NILFS_ATIME_DISABLE 382 if (flags & FS_NOATIME_FL)
369 if (flags & NILFS_NOATIME_FL)
370#endif
371 inode->i_flags |= S_NOATIME; 383 inode->i_flags |= S_NOATIME;
372 if (flags & NILFS_DIRSYNC_FL) 384 if (flags & FS_DIRSYNC_FL)
373 inode->i_flags |= S_DIRSYNC; 385 inode->i_flags |= S_DIRSYNC;
374 mapping_set_gfp_mask(inode->i_mapping, 386 mapping_set_gfp_mask(inode->i_mapping,
375 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); 387 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
@@ -420,7 +432,7 @@ static int __nilfs_read_inode(struct super_block *sb,
420 struct nilfs_root *root, unsigned long ino, 432 struct nilfs_root *root, unsigned long ino,
421 struct inode *inode) 433 struct inode *inode)
422{ 434{
423 struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs; 435 struct the_nilfs *nilfs = sb->s_fs_info;
424 struct buffer_head *bh; 436 struct buffer_head *bh;
425 struct nilfs_inode *raw_inode; 437 struct nilfs_inode *raw_inode;
426 int err; 438 int err;
@@ -707,6 +719,7 @@ void nilfs_evict_inode(struct inode *inode)
707 struct nilfs_transaction_info ti; 719 struct nilfs_transaction_info ti;
708 struct super_block *sb = inode->i_sb; 720 struct super_block *sb = inode->i_sb;
709 struct nilfs_inode_info *ii = NILFS_I(inode); 721 struct nilfs_inode_info *ii = NILFS_I(inode);
722 int ret;
710 723
711 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { 724 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
712 if (inode->i_data.nrpages) 725 if (inode->i_data.nrpages)
@@ -725,8 +738,9 @@ void nilfs_evict_inode(struct inode *inode)
725 nilfs_mark_inode_dirty(inode); 738 nilfs_mark_inode_dirty(inode);
726 end_writeback(inode); 739 end_writeback(inode);
727 740
728 nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); 741 ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino);
729 atomic_dec(&ii->i_root->inodes_count); 742 if (!ret)
743 atomic_dec(&ii->i_root->inodes_count);
730 744
731 nilfs_clear_inode(inode); 745 nilfs_clear_inode(inode);
732 746
@@ -792,18 +806,18 @@ int nilfs_permission(struct inode *inode, int mask, unsigned int flags)
792 806
793int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) 807int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
794{ 808{
795 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); 809 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
796 struct nilfs_inode_info *ii = NILFS_I(inode); 810 struct nilfs_inode_info *ii = NILFS_I(inode);
797 int err; 811 int err;
798 812
799 spin_lock(&sbi->s_inode_lock); 813 spin_lock(&nilfs->ns_inode_lock);
800 if (ii->i_bh == NULL) { 814 if (ii->i_bh == NULL) {
801 spin_unlock(&sbi->s_inode_lock); 815 spin_unlock(&nilfs->ns_inode_lock);
802 err = nilfs_ifile_get_inode_block(ii->i_root->ifile, 816 err = nilfs_ifile_get_inode_block(ii->i_root->ifile,
803 inode->i_ino, pbh); 817 inode->i_ino, pbh);
804 if (unlikely(err)) 818 if (unlikely(err))
805 return err; 819 return err;
806 spin_lock(&sbi->s_inode_lock); 820 spin_lock(&nilfs->ns_inode_lock);
807 if (ii->i_bh == NULL) 821 if (ii->i_bh == NULL)
808 ii->i_bh = *pbh; 822 ii->i_bh = *pbh;
809 else { 823 else {
@@ -814,36 +828,36 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
814 *pbh = ii->i_bh; 828 *pbh = ii->i_bh;
815 829
816 get_bh(*pbh); 830 get_bh(*pbh);
817 spin_unlock(&sbi->s_inode_lock); 831 spin_unlock(&nilfs->ns_inode_lock);
818 return 0; 832 return 0;
819} 833}
820 834
821int nilfs_inode_dirty(struct inode *inode) 835int nilfs_inode_dirty(struct inode *inode)
822{ 836{
823 struct nilfs_inode_info *ii = NILFS_I(inode); 837 struct nilfs_inode_info *ii = NILFS_I(inode);
824 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); 838 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
825 int ret = 0; 839 int ret = 0;
826 840
827 if (!list_empty(&ii->i_dirty)) { 841 if (!list_empty(&ii->i_dirty)) {
828 spin_lock(&sbi->s_inode_lock); 842 spin_lock(&nilfs->ns_inode_lock);
829 ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || 843 ret = test_bit(NILFS_I_DIRTY, &ii->i_state) ||
830 test_bit(NILFS_I_BUSY, &ii->i_state); 844 test_bit(NILFS_I_BUSY, &ii->i_state);
831 spin_unlock(&sbi->s_inode_lock); 845 spin_unlock(&nilfs->ns_inode_lock);
832 } 846 }
833 return ret; 847 return ret;
834} 848}
835 849
836int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty) 850int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
837{ 851{
838 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
839 struct nilfs_inode_info *ii = NILFS_I(inode); 852 struct nilfs_inode_info *ii = NILFS_I(inode);
853 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
840 854
841 atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks); 855 atomic_add(nr_dirty, &nilfs->ns_ndirtyblks);
842 856
843 if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) 857 if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state))
844 return 0; 858 return 0;
845 859
846 spin_lock(&sbi->s_inode_lock); 860 spin_lock(&nilfs->ns_inode_lock);
847 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && 861 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
848 !test_bit(NILFS_I_BUSY, &ii->i_state)) { 862 !test_bit(NILFS_I_BUSY, &ii->i_state)) {
849 /* Because this routine may race with nilfs_dispose_list(), 863 /* Because this routine may race with nilfs_dispose_list(),
@@ -851,18 +865,18 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
851 if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { 865 if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) {
852 /* This will happen when somebody is freeing 866 /* This will happen when somebody is freeing
853 this inode. */ 867 this inode. */
854 nilfs_warning(sbi->s_super, __func__, 868 nilfs_warning(inode->i_sb, __func__,
855 "cannot get inode (ino=%lu)\n", 869 "cannot get inode (ino=%lu)\n",
856 inode->i_ino); 870 inode->i_ino);
857 spin_unlock(&sbi->s_inode_lock); 871 spin_unlock(&nilfs->ns_inode_lock);
858 return -EINVAL; /* NILFS_I_DIRTY may remain for 872 return -EINVAL; /* NILFS_I_DIRTY may remain for
859 freeing inode */ 873 freeing inode */
860 } 874 }
861 list_del(&ii->i_dirty); 875 list_del(&ii->i_dirty);
862 list_add_tail(&ii->i_dirty, &sbi->s_dirty_files); 876 list_add_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
863 set_bit(NILFS_I_QUEUED, &ii->i_state); 877 set_bit(NILFS_I_QUEUED, &ii->i_state);
864 } 878 }
865 spin_unlock(&sbi->s_inode_lock); 879 spin_unlock(&nilfs->ns_inode_lock);
866 return 0; 880 return 0;
867} 881}
868 882
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 496738963fdb..f2469ba6246b 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -26,7 +26,9 @@
26#include <linux/capability.h> /* capable() */ 26#include <linux/capability.h> /* capable() */
27#include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */ 27#include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */
28#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
29#include <linux/compat.h> /* compat_ptr() */
29#include <linux/mount.h> /* mnt_want_write(), mnt_drop_write() */ 30#include <linux/mount.h> /* mnt_want_write(), mnt_drop_write() */
31#include <linux/buffer_head.h>
30#include <linux/nilfs2_fs.h> 32#include <linux/nilfs2_fs.h>
31#include "nilfs.h" 33#include "nilfs.h"
32#include "segment.h" 34#include "segment.h"
@@ -97,11 +99,74 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
97 return ret; 99 return ret;
98} 100}
99 101
102static int nilfs_ioctl_getflags(struct inode *inode, void __user *argp)
103{
104 unsigned int flags = NILFS_I(inode)->i_flags & FS_FL_USER_VISIBLE;
105
106 return put_user(flags, (int __user *)argp);
107}
108
109static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
110 void __user *argp)
111{
112 struct nilfs_transaction_info ti;
113 unsigned int flags, oldflags;
114 int ret;
115
116 if (!inode_owner_or_capable(inode))
117 return -EACCES;
118
119 if (get_user(flags, (int __user *)argp))
120 return -EFAULT;
121
122 ret = mnt_want_write(filp->f_path.mnt);
123 if (ret)
124 return ret;
125
126 flags = nilfs_mask_flags(inode->i_mode, flags);
127
128 mutex_lock(&inode->i_mutex);
129
130 oldflags = NILFS_I(inode)->i_flags;
131
132 /*
133 * The IMMUTABLE and APPEND_ONLY flags can only be changed by the
134 * relevant capability.
135 */
136 ret = -EPERM;
137 if (((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) &&
138 !capable(CAP_LINUX_IMMUTABLE))
139 goto out;
140
141 ret = nilfs_transaction_begin(inode->i_sb, &ti, 0);
142 if (ret)
143 goto out;
144
145 NILFS_I(inode)->i_flags = (oldflags & ~FS_FL_USER_MODIFIABLE) |
146 (flags & FS_FL_USER_MODIFIABLE);
147
148 nilfs_set_inode_flags(inode);
149 inode->i_ctime = CURRENT_TIME;
150 if (IS_SYNC(inode))
151 nilfs_set_transaction_flag(NILFS_TI_SYNC);
152
153 nilfs_mark_inode_dirty(inode);
154 ret = nilfs_transaction_commit(inode->i_sb);
155out:
156 mutex_unlock(&inode->i_mutex);
157 mnt_drop_write(filp->f_path.mnt);
158 return ret;
159}
160
161static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp)
162{
163 return put_user(inode->i_generation, (int __user *)argp);
164}
165
100static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, 166static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
101 unsigned int cmd, void __user *argp) 167 unsigned int cmd, void __user *argp)
102{ 168{
103 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 169 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
104 struct inode *cpfile = nilfs->ns_cpfile;
105 struct nilfs_transaction_info ti; 170 struct nilfs_transaction_info ti;
106 struct nilfs_cpmode cpmode; 171 struct nilfs_cpmode cpmode;
107 int ret; 172 int ret;
@@ -121,7 +186,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
121 186
122 nilfs_transaction_begin(inode->i_sb, &ti, 0); 187 nilfs_transaction_begin(inode->i_sb, &ti, 0);
123 ret = nilfs_cpfile_change_cpmode( 188 ret = nilfs_cpfile_change_cpmode(
124 cpfile, cpmode.cm_cno, cpmode.cm_mode); 189 nilfs->ns_cpfile, cpmode.cm_cno, cpmode.cm_mode);
125 if (unlikely(ret < 0)) 190 if (unlikely(ret < 0))
126 nilfs_transaction_abort(inode->i_sb); 191 nilfs_transaction_abort(inode->i_sb);
127 else 192 else
@@ -137,7 +202,7 @@ static int
137nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, 202nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
138 unsigned int cmd, void __user *argp) 203 unsigned int cmd, void __user *argp)
139{ 204{
140 struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile; 205 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
141 struct nilfs_transaction_info ti; 206 struct nilfs_transaction_info ti;
142 __u64 cno; 207 __u64 cno;
143 int ret; 208 int ret;
@@ -154,7 +219,7 @@ nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
154 goto out; 219 goto out;
155 220
156 nilfs_transaction_begin(inode->i_sb, &ti, 0); 221 nilfs_transaction_begin(inode->i_sb, &ti, 0);
157 ret = nilfs_cpfile_delete_checkpoint(cpfile, cno); 222 ret = nilfs_cpfile_delete_checkpoint(nilfs->ns_cpfile, cno);
158 if (unlikely(ret < 0)) 223 if (unlikely(ret < 0))
159 nilfs_transaction_abort(inode->i_sb); 224 nilfs_transaction_abort(inode->i_sb);
160 else 225 else
@@ -180,7 +245,7 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
180static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, 245static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp,
181 unsigned int cmd, void __user *argp) 246 unsigned int cmd, void __user *argp)
182{ 247{
183 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 248 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
184 struct nilfs_cpstat cpstat; 249 struct nilfs_cpstat cpstat;
185 int ret; 250 int ret;
186 251
@@ -211,7 +276,7 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
211static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, 276static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp,
212 unsigned int cmd, void __user *argp) 277 unsigned int cmd, void __user *argp)
213{ 278{
214 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 279 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
215 struct nilfs_sustat sustat; 280 struct nilfs_sustat sustat;
216 int ret; 281 int ret;
217 282
@@ -267,7 +332,7 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags,
267static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, 332static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp,
268 unsigned int cmd, void __user *argp) 333 unsigned int cmd, void __user *argp)
269{ 334{
270 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 335 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
271 struct nilfs_argv argv; 336 struct nilfs_argv argv;
272 int ret; 337 int ret;
273 338
@@ -336,7 +401,7 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
336 struct nilfs_argv *argv, void *buf) 401 struct nilfs_argv *argv, void *buf)
337{ 402{
338 size_t nmembs = argv->v_nmembs; 403 size_t nmembs = argv->v_nmembs;
339 struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs; 404 struct the_nilfs *nilfs = sb->s_fs_info;
340 struct inode *inode; 405 struct inode *inode;
341 struct nilfs_vdesc *vdesc; 406 struct nilfs_vdesc *vdesc;
342 struct buffer_head *bh, *n; 407 struct buffer_head *bh, *n;
@@ -550,7 +615,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
550 ret = PTR_ERR(kbufs[4]); 615 ret = PTR_ERR(kbufs[4]);
551 goto out; 616 goto out;
552 } 617 }
553 nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 618 nilfs = inode->i_sb->s_fs_info;
554 619
555 for (n = 0; n < 4; n++) { 620 for (n = 0; n < 4; n++) {
556 ret = -EINVAL; 621 ret = -EINVAL;
@@ -623,7 +688,7 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
623 return ret; 688 return ret;
624 689
625 if (argp != NULL) { 690 if (argp != NULL) {
626 nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 691 nilfs = inode->i_sb->s_fs_info;
627 down_read(&nilfs->ns_segctor_sem); 692 down_read(&nilfs->ns_segctor_sem);
628 cno = nilfs->ns_cno - 1; 693 cno = nilfs->ns_cno - 1;
629 up_read(&nilfs->ns_segctor_sem); 694 up_read(&nilfs->ns_segctor_sem);
@@ -641,7 +706,7 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
641 void *, size_t, size_t)) 706 void *, size_t, size_t))
642 707
643{ 708{
644 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 709 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
645 struct nilfs_argv argv; 710 struct nilfs_argv argv;
646 int ret; 711 int ret;
647 712
@@ -666,6 +731,12 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
666 void __user *argp = (void __user *)arg; 731 void __user *argp = (void __user *)arg;
667 732
668 switch (cmd) { 733 switch (cmd) {
734 case FS_IOC_GETFLAGS:
735 return nilfs_ioctl_getflags(inode, argp);
736 case FS_IOC_SETFLAGS:
737 return nilfs_ioctl_setflags(inode, filp, argp);
738 case FS_IOC_GETVERSION:
739 return nilfs_ioctl_getversion(inode, argp);
669 case NILFS_IOCTL_CHANGE_CPMODE: 740 case NILFS_IOCTL_CHANGE_CPMODE:
670 return nilfs_ioctl_change_cpmode(inode, filp, cmd, argp); 741 return nilfs_ioctl_change_cpmode(inode, filp, cmd, argp);
671 case NILFS_IOCTL_DELETE_CHECKPOINT: 742 case NILFS_IOCTL_DELETE_CHECKPOINT:
@@ -696,3 +767,23 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
696 return -ENOTTY; 767 return -ENOTTY;
697 } 768 }
698} 769}
770
771#ifdef CONFIG_COMPAT
772long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
773{
774 switch (cmd) {
775 case FS_IOC32_GETFLAGS:
776 cmd = FS_IOC_GETFLAGS;
777 break;
778 case FS_IOC32_SETFLAGS:
779 cmd = FS_IOC_SETFLAGS;
780 break;
781 case FS_IOC32_GETVERSION:
782 cmd = FS_IOC_GETVERSION;
783 break;
784 default:
785 return -ENOIOCTLCMD;
786 }
787 return nilfs_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
788}
789#endif
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index a0babd2bff6a..a649b05f7069 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -399,7 +399,6 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
399 399
400static const struct address_space_operations def_mdt_aops = { 400static const struct address_space_operations def_mdt_aops = {
401 .writepage = nilfs_mdt_write_page, 401 .writepage = nilfs_mdt_write_page,
402 .sync_page = block_sync_page,
403}; 402};
404 403
405static const struct inode_operations def_mdt_iops; 404static const struct inode_operations def_mdt_iops;
@@ -438,10 +437,6 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size,
438 mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); 437 mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
439} 438}
440 439
441static const struct address_space_operations shadow_map_aops = {
442 .sync_page = block_sync_page,
443};
444
445/** 440/**
446 * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file 441 * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file
447 * @inode: inode of the metadata file 442 * @inode: inode of the metadata file
@@ -455,9 +450,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
455 450
456 INIT_LIST_HEAD(&shadow->frozen_buffers); 451 INIT_LIST_HEAD(&shadow->frozen_buffers);
457 address_space_init_once(&shadow->frozen_data); 452 address_space_init_once(&shadow->frozen_data);
458 nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops); 453 nilfs_mapping_init(&shadow->frozen_data, bdi);
459 address_space_init_once(&shadow->frozen_btnodes); 454 address_space_init_once(&shadow->frozen_btnodes);
460 nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops); 455 nilfs_mapping_init(&shadow->frozen_btnodes, bdi);
461 mi->mi_shadow = shadow; 456 mi->mi_shadow = shadow;
462 return 0; 457 return 0;
463} 458}
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index b13734bf3521..ed68563ec708 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -66,7 +66,7 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
66 66
67static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode) 67static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode)
68{ 68{
69 return NILFS_SB(inode->i_sb)->s_nilfs; 69 return inode->i_sb->s_fs_info;
70} 70}
71 71
72/* Default GFP flags using highmem */ 72/* Default GFP flags using highmem */
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 161791d26458..546849b3e88f 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -482,7 +482,7 @@ static struct dentry *nilfs_get_dentry(struct super_block *sb, u64 cno,
482 if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO) 482 if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO)
483 return ERR_PTR(-ESTALE); 483 return ERR_PTR(-ESTALE);
484 484
485 root = nilfs_lookup_root(NILFS_SB(sb)->s_nilfs, cno); 485 root = nilfs_lookup_root(sb->s_fs_info, cno);
486 if (!root) 486 if (!root)
487 return ERR_PTR(-ESTALE); 487 return ERR_PTR(-ESTALE);
488 488
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 777e8fd04304..856e8e4e0b74 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -30,7 +30,6 @@
30#include <linux/blkdev.h> 30#include <linux/blkdev.h>
31#include <linux/nilfs2_fs.h> 31#include <linux/nilfs2_fs.h>
32#include "the_nilfs.h" 32#include "the_nilfs.h"
33#include "sb.h"
34#include "bmap.h" 33#include "bmap.h"
35 34
36/* 35/*
@@ -122,7 +121,7 @@ enum {
122#define NILFS_SYS_INO_BITS \ 121#define NILFS_SYS_INO_BITS \
123 ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS) 122 ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS)
124 123
125#define NILFS_FIRST_INO(sb) (NILFS_SB(sb)->s_nilfs->ns_first_ino) 124#define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino)
126 125
127#define NILFS_MDT_INODE(sb, ino) \ 126#define NILFS_MDT_INODE(sb, ino) \
128 ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino)))) 127 ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino))))
@@ -212,6 +211,23 @@ static inline int nilfs_init_acl(struct inode *inode, struct inode *dir)
212 211
213#define NILFS_ATIME_DISABLE 212#define NILFS_ATIME_DISABLE
214 213
214/* Flags that should be inherited by new inodes from their parent. */
215#define NILFS_FL_INHERITED \
216 (FS_SECRM_FL | FS_UNRM_FL | FS_COMPR_FL | FS_SYNC_FL | \
217 FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL |\
218 FS_COMPRBLK_FL | FS_NOCOMP_FL | FS_NOTAIL_FL | FS_DIRSYNC_FL)
219
220/* Mask out flags that are inappropriate for the given type of inode. */
221static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags)
222{
223 if (S_ISDIR(mode))
224 return flags;
225 else if (S_ISREG(mode))
226 return flags & ~(FS_DIRSYNC_FL | FS_TOPDIR_FL);
227 else
228 return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
229}
230
215/* dir.c */ 231/* dir.c */
216extern int nilfs_add_link(struct dentry *, struct inode *); 232extern int nilfs_add_link(struct dentry *, struct inode *);
217extern ino_t nilfs_inode_by_name(struct inode *, const struct qstr *); 233extern ino_t nilfs_inode_by_name(struct inode *, const struct qstr *);
@@ -229,10 +245,13 @@ extern int nilfs_sync_file(struct file *, int);
229 245
230/* ioctl.c */ 246/* ioctl.c */
231long nilfs_ioctl(struct file *, unsigned int, unsigned long); 247long nilfs_ioctl(struct file *, unsigned int, unsigned long);
248long nilfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
232int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *, 249int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *,
233 void **); 250 void **);
234 251
235/* inode.c */ 252/* inode.c */
253void nilfs_inode_add_blocks(struct inode *inode, int n);
254void nilfs_inode_sub_blocks(struct inode *inode, int n);
236extern struct inode *nilfs_new_inode(struct inode *, int); 255extern struct inode *nilfs_new_inode(struct inode *, int);
237extern void nilfs_free_inode(struct inode *); 256extern void nilfs_free_inode(struct inode *);
238extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int); 257extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
@@ -275,11 +294,11 @@ extern int nilfs_check_feature_compatibility(struct super_block *,
275 struct nilfs_super_block *); 294 struct nilfs_super_block *);
276extern void nilfs_set_log_cursor(struct nilfs_super_block *, 295extern void nilfs_set_log_cursor(struct nilfs_super_block *,
277 struct the_nilfs *); 296 struct the_nilfs *);
278extern struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *, 297struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
279 int flip); 298 int flip);
280extern int nilfs_commit_super(struct nilfs_sb_info *, int); 299int nilfs_commit_super(struct super_block *sb, int flag);
281extern int nilfs_cleanup_super(struct nilfs_sb_info *); 300int nilfs_cleanup_super(struct super_block *sb);
282int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt, 301int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
283 struct nilfs_root **root); 302 struct nilfs_root **root);
284int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno); 303int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno);
285 304
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index a585b35fd6bc..4d2a1ee0eb47 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -493,15 +493,14 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
493} 493}
494 494
495void nilfs_mapping_init(struct address_space *mapping, 495void nilfs_mapping_init(struct address_space *mapping,
496 struct backing_dev_info *bdi, 496 struct backing_dev_info *bdi)
497 const struct address_space_operations *aops)
498{ 497{
499 mapping->host = NULL; 498 mapping->host = NULL;
500 mapping->flags = 0; 499 mapping->flags = 0;
501 mapping_set_gfp_mask(mapping, GFP_NOFS); 500 mapping_set_gfp_mask(mapping, GFP_NOFS);
502 mapping->assoc_mapping = NULL; 501 mapping->assoc_mapping = NULL;
503 mapping->backing_dev_info = bdi; 502 mapping->backing_dev_info = bdi;
504 mapping->a_ops = aops; 503 mapping->a_ops = NULL;
505} 504}
506 505
507/* 506/*
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 2a00953ebd5f..f06b79ad7493 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -62,8 +62,7 @@ int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
62void nilfs_copy_back_pages(struct address_space *, struct address_space *); 62void nilfs_copy_back_pages(struct address_space *, struct address_space *);
63void nilfs_clear_dirty_pages(struct address_space *); 63void nilfs_clear_dirty_pages(struct address_space *);
64void nilfs_mapping_init(struct address_space *mapping, 64void nilfs_mapping_init(struct address_space *mapping,
65 struct backing_dev_info *bdi, 65 struct backing_dev_info *bdi);
66 const struct address_space_operations *aops);
67unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); 66unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
68unsigned long nilfs_find_uncommitted_extent(struct inode *inode, 67unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
69 sector_t start_blk, 68 sector_t start_blk,
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 3dfcd3b7d389..ba4a64518f38 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -425,7 +425,7 @@ void nilfs_dispose_segment_list(struct list_head *head)
425} 425}
426 426
427static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, 427static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
428 struct nilfs_sb_info *sbi, 428 struct super_block *sb,
429 struct nilfs_recovery_info *ri) 429 struct nilfs_recovery_info *ri)
430{ 430{
431 struct list_head *head = &ri->ri_used_segments; 431 struct list_head *head = &ri->ri_used_segments;
@@ -501,7 +501,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
501} 501}
502 502
503static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, 503static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
504 struct nilfs_sb_info *sbi, 504 struct super_block *sb,
505 struct nilfs_root *root, 505 struct nilfs_root *root,
506 struct list_head *head, 506 struct list_head *head,
507 unsigned long *nr_salvaged_blocks) 507 unsigned long *nr_salvaged_blocks)
@@ -514,7 +514,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
514 int err = 0, err2 = 0; 514 int err = 0, err2 = 0;
515 515
516 list_for_each_entry_safe(rb, n, head, list) { 516 list_for_each_entry_safe(rb, n, head, list) {
517 inode = nilfs_iget(sbi->s_super, root, rb->ino); 517 inode = nilfs_iget(sb, root, rb->ino);
518 if (IS_ERR(inode)) { 518 if (IS_ERR(inode)) {
519 err = PTR_ERR(inode); 519 err = PTR_ERR(inode);
520 inode = NULL; 520 inode = NULL;
@@ -572,11 +572,11 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
572 * nilfs_do_roll_forward - salvage logical segments newer than the latest 572 * nilfs_do_roll_forward - salvage logical segments newer than the latest
573 * checkpoint 573 * checkpoint
574 * @nilfs: nilfs object 574 * @nilfs: nilfs object
575 * @sbi: nilfs_sb_info 575 * @sb: super block instance
576 * @ri: pointer to a nilfs_recovery_info 576 * @ri: pointer to a nilfs_recovery_info
577 */ 577 */
578static int nilfs_do_roll_forward(struct the_nilfs *nilfs, 578static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
579 struct nilfs_sb_info *sbi, 579 struct super_block *sb,
580 struct nilfs_root *root, 580 struct nilfs_root *root,
581 struct nilfs_recovery_info *ri) 581 struct nilfs_recovery_info *ri)
582{ 582{
@@ -648,7 +648,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
648 goto failed; 648 goto failed;
649 if (flags & NILFS_SS_LOGEND) { 649 if (flags & NILFS_SS_LOGEND) {
650 err = nilfs_recover_dsync_blocks( 650 err = nilfs_recover_dsync_blocks(
651 nilfs, sbi, root, &dsync_blocks, 651 nilfs, sb, root, &dsync_blocks,
652 &nsalvaged_blocks); 652 &nsalvaged_blocks);
653 if (unlikely(err)) 653 if (unlikely(err))
654 goto failed; 654 goto failed;
@@ -681,7 +681,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
681 681
682 if (nsalvaged_blocks) { 682 if (nsalvaged_blocks) {
683 printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n", 683 printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n",
684 sbi->s_super->s_id, nsalvaged_blocks); 684 sb->s_id, nsalvaged_blocks);
685 ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE; 685 ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE;
686 } 686 }
687 out: 687 out:
@@ -695,7 +695,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
695 printk(KERN_ERR 695 printk(KERN_ERR
696 "NILFS (device %s): Error roll-forwarding " 696 "NILFS (device %s): Error roll-forwarding "
697 "(err=%d, pseg block=%llu). ", 697 "(err=%d, pseg block=%llu). ",
698 sbi->s_super->s_id, err, (unsigned long long)pseg_start); 698 sb->s_id, err, (unsigned long long)pseg_start);
699 goto out; 699 goto out;
700} 700}
701 701
@@ -724,7 +724,7 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
724/** 724/**
725 * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint 725 * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint
726 * @nilfs: nilfs object 726 * @nilfs: nilfs object
727 * @sbi: nilfs_sb_info 727 * @sb: super block instance
728 * @ri: pointer to a nilfs_recovery_info struct to store search results. 728 * @ri: pointer to a nilfs_recovery_info struct to store search results.
729 * 729 *
730 * Return Value: On success, 0 is returned. On error, one of the following 730 * Return Value: On success, 0 is returned. On error, one of the following
@@ -741,7 +741,7 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
741 * %-ENOMEM - Insufficient memory available. 741 * %-ENOMEM - Insufficient memory available.
742 */ 742 */
743int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, 743int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
744 struct nilfs_sb_info *sbi, 744 struct super_block *sb,
745 struct nilfs_recovery_info *ri) 745 struct nilfs_recovery_info *ri)
746{ 746{
747 struct nilfs_root *root; 747 struct nilfs_root *root;
@@ -750,32 +750,32 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
750 if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0) 750 if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0)
751 return 0; 751 return 0;
752 752
753 err = nilfs_attach_checkpoint(sbi, ri->ri_cno, true, &root); 753 err = nilfs_attach_checkpoint(sb, ri->ri_cno, true, &root);
754 if (unlikely(err)) { 754 if (unlikely(err)) {
755 printk(KERN_ERR 755 printk(KERN_ERR
756 "NILFS: error loading the latest checkpoint.\n"); 756 "NILFS: error loading the latest checkpoint.\n");
757 return err; 757 return err;
758 } 758 }
759 759
760 err = nilfs_do_roll_forward(nilfs, sbi, root, ri); 760 err = nilfs_do_roll_forward(nilfs, sb, root, ri);
761 if (unlikely(err)) 761 if (unlikely(err))
762 goto failed; 762 goto failed;
763 763
764 if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) { 764 if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) {
765 err = nilfs_prepare_segment_for_recovery(nilfs, sbi, ri); 765 err = nilfs_prepare_segment_for_recovery(nilfs, sb, ri);
766 if (unlikely(err)) { 766 if (unlikely(err)) {
767 printk(KERN_ERR "NILFS: Error preparing segments for " 767 printk(KERN_ERR "NILFS: Error preparing segments for "
768 "recovery.\n"); 768 "recovery.\n");
769 goto failed; 769 goto failed;
770 } 770 }
771 771
772 err = nilfs_attach_segment_constructor(sbi, root); 772 err = nilfs_attach_log_writer(sb, root);
773 if (unlikely(err)) 773 if (unlikely(err))
774 goto failed; 774 goto failed;
775 775
776 set_nilfs_discontinued(nilfs); 776 set_nilfs_discontinued(nilfs);
777 err = nilfs_construct_segment(sbi->s_super); 777 err = nilfs_construct_segment(sb);
778 nilfs_detach_segment_constructor(sbi); 778 nilfs_detach_log_writer(sb);
779 779
780 if (unlikely(err)) { 780 if (unlikely(err)) {
781 printk(KERN_ERR "NILFS: Oops! recovery failed. " 781 printk(KERN_ERR "NILFS: Oops! recovery failed. "
diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h
deleted file mode 100644
index 7a17715f215f..000000000000
--- a/fs/nilfs2/sb.h
+++ /dev/null
@@ -1,85 +0,0 @@
1/*
2 * sb.h - NILFS on-memory super block structure.
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 *
22 */
23
24#ifndef _NILFS_SB
25#define _NILFS_SB
26
27#include <linux/types.h>
28#include <linux/fs.h>
29
30struct the_nilfs;
31struct nilfs_sc_info;
32
33/*
34 * NILFS super-block data in memory
35 */
36struct nilfs_sb_info {
37 /* Mount options */
38 unsigned long s_mount_opt;
39 uid_t s_resuid;
40 gid_t s_resgid;
41
42 unsigned long s_interval; /* construction interval */
43 unsigned long s_watermark; /* threshold of data amount
44 for the segment construction */
45
46 /* Fundamental members */
47 struct super_block *s_super; /* reverse pointer to super_block */
48 struct the_nilfs *s_nilfs;
49
50 /* Segment constructor */
51 struct list_head s_dirty_files; /* dirty files list */
52 struct nilfs_sc_info *s_sc_info; /* segment constructor info */
53 spinlock_t s_inode_lock; /* Lock for the nilfs inode.
54 It covers s_dirty_files list */
55
56 /* Inode allocator */
57 spinlock_t s_next_gen_lock;
58 u32 s_next_generation;
59};
60
61static inline struct nilfs_sb_info *NILFS_SB(struct super_block *sb)
62{
63 return sb->s_fs_info;
64}
65
66static inline struct nilfs_sc_info *NILFS_SC(struct nilfs_sb_info *sbi)
67{
68 return sbi->s_sc_info;
69}
70
71/*
72 * Bit operations for the mount option
73 */
74#define nilfs_clear_opt(sbi, opt) \
75 do { (sbi)->s_mount_opt &= ~NILFS_MOUNT_##opt; } while (0)
76#define nilfs_set_opt(sbi, opt) \
77 do { (sbi)->s_mount_opt |= NILFS_MOUNT_##opt; } while (0)
78#define nilfs_test_opt(sbi, opt) ((sbi)->s_mount_opt & NILFS_MOUNT_##opt)
79#define nilfs_write_opt(sbi, mask, opt) \
80 do { (sbi)->s_mount_opt = \
81 (((sbi)->s_mount_opt & ~NILFS_MOUNT_##mask) | \
82 NILFS_MOUNT_##opt); \
83 } while (0)
84
85#endif /* _NILFS_SB */
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 0f83e93935b2..2853ff20f85a 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -509,7 +509,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
509 * Last BIO is always sent through the following 509 * Last BIO is always sent through the following
510 * submission. 510 * submission.
511 */ 511 */
512 rw |= REQ_SYNC | REQ_UNPLUG; 512 rw |= REQ_SYNC;
513 res = nilfs_segbuf_submit_bio(segbuf, &wi, rw); 513 res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
514 } 514 }
515 515
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 2de9f636792a..afe4f2183454 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -104,8 +104,7 @@ struct nilfs_sc_operations {
104static void nilfs_segctor_start_timer(struct nilfs_sc_info *); 104static void nilfs_segctor_start_timer(struct nilfs_sc_info *);
105static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int); 105static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int);
106static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *); 106static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *);
107static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *, 107static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int);
108 int);
109 108
110#define nilfs_cnt32_gt(a, b) \ 109#define nilfs_cnt32_gt(a, b) \
111 (typecheck(__u32, a) && typecheck(__u32, b) && \ 110 (typecheck(__u32, a) && typecheck(__u32, b) && \
@@ -182,7 +181,6 @@ int nilfs_transaction_begin(struct super_block *sb,
182 struct nilfs_transaction_info *ti, 181 struct nilfs_transaction_info *ti,
183 int vacancy_check) 182 int vacancy_check)
184{ 183{
185 struct nilfs_sb_info *sbi;
186 struct the_nilfs *nilfs; 184 struct the_nilfs *nilfs;
187 int ret = nilfs_prepare_segment_lock(ti); 185 int ret = nilfs_prepare_segment_lock(ti);
188 186
@@ -193,8 +191,7 @@ int nilfs_transaction_begin(struct super_block *sb,
193 191
194 vfs_check_frozen(sb, SB_FREEZE_WRITE); 192 vfs_check_frozen(sb, SB_FREEZE_WRITE);
195 193
196 sbi = NILFS_SB(sb); 194 nilfs = sb->s_fs_info;
197 nilfs = sbi->s_nilfs;
198 down_read(&nilfs->ns_segctor_sem); 195 down_read(&nilfs->ns_segctor_sem);
199 if (vacancy_check && nilfs_near_disk_full(nilfs)) { 196 if (vacancy_check && nilfs_near_disk_full(nilfs)) {
200 up_read(&nilfs->ns_segctor_sem); 197 up_read(&nilfs->ns_segctor_sem);
@@ -225,8 +222,7 @@ int nilfs_transaction_begin(struct super_block *sb,
225int nilfs_transaction_commit(struct super_block *sb) 222int nilfs_transaction_commit(struct super_block *sb)
226{ 223{
227 struct nilfs_transaction_info *ti = current->journal_info; 224 struct nilfs_transaction_info *ti = current->journal_info;
228 struct nilfs_sb_info *sbi; 225 struct the_nilfs *nilfs = sb->s_fs_info;
229 struct nilfs_sc_info *sci;
230 int err = 0; 226 int err = 0;
231 227
232 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 228 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
@@ -235,16 +231,15 @@ int nilfs_transaction_commit(struct super_block *sb)
235 ti->ti_count--; 231 ti->ti_count--;
236 return 0; 232 return 0;
237 } 233 }
238 sbi = NILFS_SB(sb); 234 if (nilfs->ns_writer) {
239 sci = NILFS_SC(sbi); 235 struct nilfs_sc_info *sci = nilfs->ns_writer;
240 if (sci != NULL) { 236
241 if (ti->ti_flags & NILFS_TI_COMMIT) 237 if (ti->ti_flags & NILFS_TI_COMMIT)
242 nilfs_segctor_start_timer(sci); 238 nilfs_segctor_start_timer(sci);
243 if (atomic_read(&sbi->s_nilfs->ns_ndirtyblks) > 239 if (atomic_read(&nilfs->ns_ndirtyblks) > sci->sc_watermark)
244 sci->sc_watermark)
245 nilfs_segctor_do_flush(sci, 0); 240 nilfs_segctor_do_flush(sci, 0);
246 } 241 }
247 up_read(&sbi->s_nilfs->ns_segctor_sem); 242 up_read(&nilfs->ns_segctor_sem);
248 current->journal_info = ti->ti_save; 243 current->journal_info = ti->ti_save;
249 244
250 if (ti->ti_flags & NILFS_TI_SYNC) 245 if (ti->ti_flags & NILFS_TI_SYNC)
@@ -257,13 +252,14 @@ int nilfs_transaction_commit(struct super_block *sb)
257void nilfs_transaction_abort(struct super_block *sb) 252void nilfs_transaction_abort(struct super_block *sb)
258{ 253{
259 struct nilfs_transaction_info *ti = current->journal_info; 254 struct nilfs_transaction_info *ti = current->journal_info;
255 struct the_nilfs *nilfs = sb->s_fs_info;
260 256
261 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 257 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
262 if (ti->ti_count > 0) { 258 if (ti->ti_count > 0) {
263 ti->ti_count--; 259 ti->ti_count--;
264 return; 260 return;
265 } 261 }
266 up_read(&NILFS_SB(sb)->s_nilfs->ns_segctor_sem); 262 up_read(&nilfs->ns_segctor_sem);
267 263
268 current->journal_info = ti->ti_save; 264 current->journal_info = ti->ti_save;
269 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 265 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
@@ -272,9 +268,8 @@ void nilfs_transaction_abort(struct super_block *sb)
272 268
273void nilfs_relax_pressure_in_lock(struct super_block *sb) 269void nilfs_relax_pressure_in_lock(struct super_block *sb)
274{ 270{
275 struct nilfs_sb_info *sbi = NILFS_SB(sb); 271 struct the_nilfs *nilfs = sb->s_fs_info;
276 struct nilfs_sc_info *sci = NILFS_SC(sbi); 272 struct nilfs_sc_info *sci = nilfs->ns_writer;
277 struct the_nilfs *nilfs = sbi->s_nilfs;
278 273
279 if (!sci || !sci->sc_flush_request) 274 if (!sci || !sci->sc_flush_request)
280 return; 275 return;
@@ -294,11 +289,13 @@ void nilfs_relax_pressure_in_lock(struct super_block *sb)
294 downgrade_write(&nilfs->ns_segctor_sem); 289 downgrade_write(&nilfs->ns_segctor_sem);
295} 290}
296 291
297static void nilfs_transaction_lock(struct nilfs_sb_info *sbi, 292static void nilfs_transaction_lock(struct super_block *sb,
298 struct nilfs_transaction_info *ti, 293 struct nilfs_transaction_info *ti,
299 int gcflag) 294 int gcflag)
300{ 295{
301 struct nilfs_transaction_info *cur_ti = current->journal_info; 296 struct nilfs_transaction_info *cur_ti = current->journal_info;
297 struct the_nilfs *nilfs = sb->s_fs_info;
298 struct nilfs_sc_info *sci = nilfs->ns_writer;
302 299
303 WARN_ON(cur_ti); 300 WARN_ON(cur_ti);
304 ti->ti_flags = NILFS_TI_WRITER; 301 ti->ti_flags = NILFS_TI_WRITER;
@@ -309,30 +306,31 @@ static void nilfs_transaction_lock(struct nilfs_sb_info *sbi,
309 current->journal_info = ti; 306 current->journal_info = ti;
310 307
311 for (;;) { 308 for (;;) {
312 down_write(&sbi->s_nilfs->ns_segctor_sem); 309 down_write(&nilfs->ns_segctor_sem);
313 if (!test_bit(NILFS_SC_PRIOR_FLUSH, &NILFS_SC(sbi)->sc_flags)) 310 if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags))
314 break; 311 break;
315 312
316 nilfs_segctor_do_immediate_flush(NILFS_SC(sbi)); 313 nilfs_segctor_do_immediate_flush(sci);
317 314
318 up_write(&sbi->s_nilfs->ns_segctor_sem); 315 up_write(&nilfs->ns_segctor_sem);
319 yield(); 316 yield();
320 } 317 }
321 if (gcflag) 318 if (gcflag)
322 ti->ti_flags |= NILFS_TI_GC; 319 ti->ti_flags |= NILFS_TI_GC;
323} 320}
324 321
325static void nilfs_transaction_unlock(struct nilfs_sb_info *sbi) 322static void nilfs_transaction_unlock(struct super_block *sb)
326{ 323{
327 struct nilfs_transaction_info *ti = current->journal_info; 324 struct nilfs_transaction_info *ti = current->journal_info;
325 struct the_nilfs *nilfs = sb->s_fs_info;
328 326
329 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 327 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
330 BUG_ON(ti->ti_count > 0); 328 BUG_ON(ti->ti_count > 0);
331 329
332 up_write(&sbi->s_nilfs->ns_segctor_sem); 330 up_write(&nilfs->ns_segctor_sem);
333 current->journal_info = ti->ti_save; 331 current->journal_info = ti->ti_save;
334 if (!list_empty(&ti->ti_garbage)) 332 if (!list_empty(&ti->ti_garbage))
335 nilfs_dispose_list(sbi, &ti->ti_garbage, 0); 333 nilfs_dispose_list(nilfs, &ti->ti_garbage, 0);
336} 334}
337 335
338static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, 336static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
@@ -714,7 +712,7 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
714 } 712 }
715} 713}
716 714
717static void nilfs_dispose_list(struct nilfs_sb_info *sbi, 715static void nilfs_dispose_list(struct the_nilfs *nilfs,
718 struct list_head *head, int force) 716 struct list_head *head, int force)
719{ 717{
720 struct nilfs_inode_info *ii, *n; 718 struct nilfs_inode_info *ii, *n;
@@ -722,7 +720,7 @@ static void nilfs_dispose_list(struct nilfs_sb_info *sbi,
722 unsigned nv = 0; 720 unsigned nv = 0;
723 721
724 while (!list_empty(head)) { 722 while (!list_empty(head)) {
725 spin_lock(&sbi->s_inode_lock); 723 spin_lock(&nilfs->ns_inode_lock);
726 list_for_each_entry_safe(ii, n, head, i_dirty) { 724 list_for_each_entry_safe(ii, n, head, i_dirty) {
727 list_del_init(&ii->i_dirty); 725 list_del_init(&ii->i_dirty);
728 if (force) { 726 if (force) {
@@ -733,14 +731,14 @@ static void nilfs_dispose_list(struct nilfs_sb_info *sbi,
733 } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { 731 } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) {
734 set_bit(NILFS_I_QUEUED, &ii->i_state); 732 set_bit(NILFS_I_QUEUED, &ii->i_state);
735 list_add_tail(&ii->i_dirty, 733 list_add_tail(&ii->i_dirty,
736 &sbi->s_dirty_files); 734 &nilfs->ns_dirty_files);
737 continue; 735 continue;
738 } 736 }
739 ivec[nv++] = ii; 737 ivec[nv++] = ii;
740 if (nv == SC_N_INODEVEC) 738 if (nv == SC_N_INODEVEC)
741 break; 739 break;
742 } 740 }
743 spin_unlock(&sbi->s_inode_lock); 741 spin_unlock(&nilfs->ns_inode_lock);
744 742
745 for (pii = ivec; nv > 0; pii++, nv--) 743 for (pii = ivec; nv > 0; pii++, nv--)
746 iput(&(*pii)->vfs_inode); 744 iput(&(*pii)->vfs_inode);
@@ -773,24 +771,23 @@ static int nilfs_segctor_clean(struct nilfs_sc_info *sci)
773 771
774static int nilfs_segctor_confirm(struct nilfs_sc_info *sci) 772static int nilfs_segctor_confirm(struct nilfs_sc_info *sci)
775{ 773{
776 struct nilfs_sb_info *sbi = sci->sc_sbi; 774 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
777 int ret = 0; 775 int ret = 0;
778 776
779 if (nilfs_test_metadata_dirty(sbi->s_nilfs, sci->sc_root)) 777 if (nilfs_test_metadata_dirty(nilfs, sci->sc_root))
780 set_bit(NILFS_SC_DIRTY, &sci->sc_flags); 778 set_bit(NILFS_SC_DIRTY, &sci->sc_flags);
781 779
782 spin_lock(&sbi->s_inode_lock); 780 spin_lock(&nilfs->ns_inode_lock);
783 if (list_empty(&sbi->s_dirty_files) && nilfs_segctor_clean(sci)) 781 if (list_empty(&nilfs->ns_dirty_files) && nilfs_segctor_clean(sci))
784 ret++; 782 ret++;
785 783
786 spin_unlock(&sbi->s_inode_lock); 784 spin_unlock(&nilfs->ns_inode_lock);
787 return ret; 785 return ret;
788} 786}
789 787
790static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) 788static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
791{ 789{
792 struct nilfs_sb_info *sbi = sci->sc_sbi; 790 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
793 struct the_nilfs *nilfs = sbi->s_nilfs;
794 791
795 nilfs_mdt_clear_dirty(sci->sc_root->ifile); 792 nilfs_mdt_clear_dirty(sci->sc_root->ifile);
796 nilfs_mdt_clear_dirty(nilfs->ns_cpfile); 793 nilfs_mdt_clear_dirty(nilfs->ns_cpfile);
@@ -800,7 +797,7 @@ static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
800 797
801static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) 798static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
802{ 799{
803 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 800 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
804 struct buffer_head *bh_cp; 801 struct buffer_head *bh_cp;
805 struct nilfs_checkpoint *raw_cp; 802 struct nilfs_checkpoint *raw_cp;
806 int err; 803 int err;
@@ -824,8 +821,7 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
824 821
825static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) 822static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
826{ 823{
827 struct nilfs_sb_info *sbi = sci->sc_sbi; 824 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
828 struct the_nilfs *nilfs = sbi->s_nilfs;
829 struct buffer_head *bh_cp; 825 struct buffer_head *bh_cp;
830 struct nilfs_checkpoint *raw_cp; 826 struct nilfs_checkpoint *raw_cp;
831 int err; 827 int err;
@@ -1049,8 +1045,7 @@ static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci,
1049 1045
1050static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) 1046static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1051{ 1047{
1052 struct nilfs_sb_info *sbi = sci->sc_sbi; 1048 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1053 struct the_nilfs *nilfs = sbi->s_nilfs;
1054 struct list_head *head; 1049 struct list_head *head;
1055 struct nilfs_inode_info *ii; 1050 struct nilfs_inode_info *ii;
1056 size_t ndone; 1051 size_t ndone;
@@ -1859,7 +1854,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1859{ 1854{
1860 struct nilfs_segment_buffer *segbuf; 1855 struct nilfs_segment_buffer *segbuf;
1861 struct page *bd_page = NULL, *fs_page = NULL; 1856 struct page *bd_page = NULL, *fs_page = NULL;
1862 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 1857 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1863 int update_sr = false; 1858 int update_sr = false;
1864 1859
1865 list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) { 1860 list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) {
@@ -1963,30 +1958,30 @@ static int nilfs_segctor_wait(struct nilfs_sc_info *sci)
1963 return ret; 1958 return ret;
1964} 1959}
1965 1960
1966static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, 1961static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1967 struct nilfs_sb_info *sbi) 1962 struct the_nilfs *nilfs)
1968{ 1963{
1969 struct nilfs_inode_info *ii, *n; 1964 struct nilfs_inode_info *ii, *n;
1970 struct inode *ifile = sci->sc_root->ifile; 1965 struct inode *ifile = sci->sc_root->ifile;
1971 1966
1972 spin_lock(&sbi->s_inode_lock); 1967 spin_lock(&nilfs->ns_inode_lock);
1973 retry: 1968 retry:
1974 list_for_each_entry_safe(ii, n, &sbi->s_dirty_files, i_dirty) { 1969 list_for_each_entry_safe(ii, n, &nilfs->ns_dirty_files, i_dirty) {
1975 if (!ii->i_bh) { 1970 if (!ii->i_bh) {
1976 struct buffer_head *ibh; 1971 struct buffer_head *ibh;
1977 int err; 1972 int err;
1978 1973
1979 spin_unlock(&sbi->s_inode_lock); 1974 spin_unlock(&nilfs->ns_inode_lock);
1980 err = nilfs_ifile_get_inode_block( 1975 err = nilfs_ifile_get_inode_block(
1981 ifile, ii->vfs_inode.i_ino, &ibh); 1976 ifile, ii->vfs_inode.i_ino, &ibh);
1982 if (unlikely(err)) { 1977 if (unlikely(err)) {
1983 nilfs_warning(sbi->s_super, __func__, 1978 nilfs_warning(sci->sc_super, __func__,
1984 "failed to get inode block.\n"); 1979 "failed to get inode block.\n");
1985 return err; 1980 return err;
1986 } 1981 }
1987 nilfs_mdt_mark_buffer_dirty(ibh); 1982 nilfs_mdt_mark_buffer_dirty(ibh);
1988 nilfs_mdt_mark_dirty(ifile); 1983 nilfs_mdt_mark_dirty(ifile);
1989 spin_lock(&sbi->s_inode_lock); 1984 spin_lock(&nilfs->ns_inode_lock);
1990 if (likely(!ii->i_bh)) 1985 if (likely(!ii->i_bh))
1991 ii->i_bh = ibh; 1986 ii->i_bh = ibh;
1992 else 1987 else
@@ -1999,18 +1994,18 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci,
1999 list_del(&ii->i_dirty); 1994 list_del(&ii->i_dirty);
2000 list_add_tail(&ii->i_dirty, &sci->sc_dirty_files); 1995 list_add_tail(&ii->i_dirty, &sci->sc_dirty_files);
2001 } 1996 }
2002 spin_unlock(&sbi->s_inode_lock); 1997 spin_unlock(&nilfs->ns_inode_lock);
2003 1998
2004 return 0; 1999 return 0;
2005} 2000}
2006 2001
2007static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, 2002static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
2008 struct nilfs_sb_info *sbi) 2003 struct the_nilfs *nilfs)
2009{ 2004{
2010 struct nilfs_transaction_info *ti = current->journal_info; 2005 struct nilfs_transaction_info *ti = current->journal_info;
2011 struct nilfs_inode_info *ii, *n; 2006 struct nilfs_inode_info *ii, *n;
2012 2007
2013 spin_lock(&sbi->s_inode_lock); 2008 spin_lock(&nilfs->ns_inode_lock);
2014 list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { 2009 list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) {
2015 if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || 2010 if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) ||
2016 test_bit(NILFS_I_DIRTY, &ii->i_state)) 2011 test_bit(NILFS_I_DIRTY, &ii->i_state))
@@ -2022,7 +2017,7 @@ static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci,
2022 list_del(&ii->i_dirty); 2017 list_del(&ii->i_dirty);
2023 list_add_tail(&ii->i_dirty, &ti->ti_garbage); 2018 list_add_tail(&ii->i_dirty, &ti->ti_garbage);
2024 } 2019 }
2025 spin_unlock(&sbi->s_inode_lock); 2020 spin_unlock(&nilfs->ns_inode_lock);
2026} 2021}
2027 2022
2028/* 2023/*
@@ -2030,15 +2025,14 @@ static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci,
2030 */ 2025 */
2031static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) 2026static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2032{ 2027{
2033 struct nilfs_sb_info *sbi = sci->sc_sbi; 2028 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2034 struct the_nilfs *nilfs = sbi->s_nilfs;
2035 struct page *failed_page; 2029 struct page *failed_page;
2036 int err; 2030 int err;
2037 2031
2038 sci->sc_stage.scnt = NILFS_ST_INIT; 2032 sci->sc_stage.scnt = NILFS_ST_INIT;
2039 sci->sc_cno = nilfs->ns_cno; 2033 sci->sc_cno = nilfs->ns_cno;
2040 2034
2041 err = nilfs_segctor_check_in_files(sci, sbi); 2035 err = nilfs_segctor_collect_dirty_files(sci, nilfs);
2042 if (unlikely(err)) 2036 if (unlikely(err))
2043 goto out; 2037 goto out;
2044 2038
@@ -2116,7 +2110,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2116 } while (sci->sc_stage.scnt != NILFS_ST_DONE); 2110 } while (sci->sc_stage.scnt != NILFS_ST_DONE);
2117 2111
2118 out: 2112 out:
2119 nilfs_segctor_check_out_files(sci, sbi); 2113 nilfs_segctor_drop_written_files(sci, nilfs);
2120 return err; 2114 return err;
2121 2115
2122 failed_to_write: 2116 failed_to_write:
@@ -2169,8 +2163,8 @@ static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn)
2169 */ 2163 */
2170void nilfs_flush_segment(struct super_block *sb, ino_t ino) 2164void nilfs_flush_segment(struct super_block *sb, ino_t ino)
2171{ 2165{
2172 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2166 struct the_nilfs *nilfs = sb->s_fs_info;
2173 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2167 struct nilfs_sc_info *sci = nilfs->ns_writer;
2174 2168
2175 if (!sci || nilfs_doing_construction()) 2169 if (!sci || nilfs_doing_construction())
2176 return; 2170 return;
@@ -2259,8 +2253,8 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
2259 */ 2253 */
2260int nilfs_construct_segment(struct super_block *sb) 2254int nilfs_construct_segment(struct super_block *sb)
2261{ 2255{
2262 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2256 struct the_nilfs *nilfs = sb->s_fs_info;
2263 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2257 struct nilfs_sc_info *sci = nilfs->ns_writer;
2264 struct nilfs_transaction_info *ti; 2258 struct nilfs_transaction_info *ti;
2265 int err; 2259 int err;
2266 2260
@@ -2297,8 +2291,8 @@ int nilfs_construct_segment(struct super_block *sb)
2297int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, 2291int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
2298 loff_t start, loff_t end) 2292 loff_t start, loff_t end)
2299{ 2293{
2300 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2294 struct the_nilfs *nilfs = sb->s_fs_info;
2301 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2295 struct nilfs_sc_info *sci = nilfs->ns_writer;
2302 struct nilfs_inode_info *ii; 2296 struct nilfs_inode_info *ii;
2303 struct nilfs_transaction_info ti; 2297 struct nilfs_transaction_info ti;
2304 int err = 0; 2298 int err = 0;
@@ -2306,33 +2300,33 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
2306 if (!sci) 2300 if (!sci)
2307 return -EROFS; 2301 return -EROFS;
2308 2302
2309 nilfs_transaction_lock(sbi, &ti, 0); 2303 nilfs_transaction_lock(sb, &ti, 0);
2310 2304
2311 ii = NILFS_I(inode); 2305 ii = NILFS_I(inode);
2312 if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) || 2306 if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) ||
2313 nilfs_test_opt(sbi, STRICT_ORDER) || 2307 nilfs_test_opt(nilfs, STRICT_ORDER) ||
2314 test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || 2308 test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
2315 nilfs_discontinued(sbi->s_nilfs)) { 2309 nilfs_discontinued(nilfs)) {
2316 nilfs_transaction_unlock(sbi); 2310 nilfs_transaction_unlock(sb);
2317 err = nilfs_segctor_sync(sci); 2311 err = nilfs_segctor_sync(sci);
2318 return err; 2312 return err;
2319 } 2313 }
2320 2314
2321 spin_lock(&sbi->s_inode_lock); 2315 spin_lock(&nilfs->ns_inode_lock);
2322 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && 2316 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
2323 !test_bit(NILFS_I_BUSY, &ii->i_state)) { 2317 !test_bit(NILFS_I_BUSY, &ii->i_state)) {
2324 spin_unlock(&sbi->s_inode_lock); 2318 spin_unlock(&nilfs->ns_inode_lock);
2325 nilfs_transaction_unlock(sbi); 2319 nilfs_transaction_unlock(sb);
2326 return 0; 2320 return 0;
2327 } 2321 }
2328 spin_unlock(&sbi->s_inode_lock); 2322 spin_unlock(&nilfs->ns_inode_lock);
2329 sci->sc_dsync_inode = ii; 2323 sci->sc_dsync_inode = ii;
2330 sci->sc_dsync_start = start; 2324 sci->sc_dsync_start = start;
2331 sci->sc_dsync_end = end; 2325 sci->sc_dsync_end = end;
2332 2326
2333 err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); 2327 err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC);
2334 2328
2335 nilfs_transaction_unlock(sbi); 2329 nilfs_transaction_unlock(sb);
2336 return err; 2330 return err;
2337} 2331}
2338 2332
@@ -2388,8 +2382,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
2388 */ 2382 */
2389static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) 2383static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
2390{ 2384{
2391 struct nilfs_sb_info *sbi = sci->sc_sbi; 2385 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2392 struct the_nilfs *nilfs = sbi->s_nilfs;
2393 struct nilfs_super_block **sbp; 2386 struct nilfs_super_block **sbp;
2394 int err = 0; 2387 int err = 0;
2395 2388
@@ -2407,11 +2400,12 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
2407 nilfs_discontinued(nilfs)) { 2400 nilfs_discontinued(nilfs)) {
2408 down_write(&nilfs->ns_sem); 2401 down_write(&nilfs->ns_sem);
2409 err = -EIO; 2402 err = -EIO;
2410 sbp = nilfs_prepare_super(sbi, 2403 sbp = nilfs_prepare_super(sci->sc_super,
2411 nilfs_sb_will_flip(nilfs)); 2404 nilfs_sb_will_flip(nilfs));
2412 if (likely(sbp)) { 2405 if (likely(sbp)) {
2413 nilfs_set_log_cursor(sbp[0], nilfs); 2406 nilfs_set_log_cursor(sbp[0], nilfs);
2414 err = nilfs_commit_super(sbi, NILFS_SB_COMMIT); 2407 err = nilfs_commit_super(sci->sc_super,
2408 NILFS_SB_COMMIT);
2415 } 2409 }
2416 up_write(&nilfs->ns_sem); 2410 up_write(&nilfs->ns_sem);
2417 } 2411 }
@@ -2443,16 +2437,15 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
2443int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, 2437int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2444 void **kbufs) 2438 void **kbufs)
2445{ 2439{
2446 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2440 struct the_nilfs *nilfs = sb->s_fs_info;
2447 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2441 struct nilfs_sc_info *sci = nilfs->ns_writer;
2448 struct the_nilfs *nilfs = sbi->s_nilfs;
2449 struct nilfs_transaction_info ti; 2442 struct nilfs_transaction_info ti;
2450 int err; 2443 int err;
2451 2444
2452 if (unlikely(!sci)) 2445 if (unlikely(!sci))
2453 return -EROFS; 2446 return -EROFS;
2454 2447
2455 nilfs_transaction_lock(sbi, &ti, 1); 2448 nilfs_transaction_lock(sb, &ti, 1);
2456 2449
2457 err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat); 2450 err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat);
2458 if (unlikely(err)) 2451 if (unlikely(err))
@@ -2480,14 +2473,14 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2480 set_current_state(TASK_INTERRUPTIBLE); 2473 set_current_state(TASK_INTERRUPTIBLE);
2481 schedule_timeout(sci->sc_interval); 2474 schedule_timeout(sci->sc_interval);
2482 } 2475 }
2483 if (nilfs_test_opt(sbi, DISCARD)) { 2476 if (nilfs_test_opt(nilfs, DISCARD)) {
2484 int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs, 2477 int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs,
2485 sci->sc_nfreesegs); 2478 sci->sc_nfreesegs);
2486 if (ret) { 2479 if (ret) {
2487 printk(KERN_WARNING 2480 printk(KERN_WARNING
2488 "NILFS warning: error %d on discard request, " 2481 "NILFS warning: error %d on discard request, "
2489 "turning discards off for the device\n", ret); 2482 "turning discards off for the device\n", ret);
2490 nilfs_clear_opt(sbi, DISCARD); 2483 nilfs_clear_opt(nilfs, DISCARD);
2491 } 2484 }
2492 } 2485 }
2493 2486
@@ -2495,16 +2488,15 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2495 sci->sc_freesegs = NULL; 2488 sci->sc_freesegs = NULL;
2496 sci->sc_nfreesegs = 0; 2489 sci->sc_nfreesegs = 0;
2497 nilfs_mdt_clear_shadow_map(nilfs->ns_dat); 2490 nilfs_mdt_clear_shadow_map(nilfs->ns_dat);
2498 nilfs_transaction_unlock(sbi); 2491 nilfs_transaction_unlock(sb);
2499 return err; 2492 return err;
2500} 2493}
2501 2494
2502static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) 2495static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
2503{ 2496{
2504 struct nilfs_sb_info *sbi = sci->sc_sbi;
2505 struct nilfs_transaction_info ti; 2497 struct nilfs_transaction_info ti;
2506 2498
2507 nilfs_transaction_lock(sbi, &ti, 0); 2499 nilfs_transaction_lock(sci->sc_super, &ti, 0);
2508 nilfs_segctor_construct(sci, mode); 2500 nilfs_segctor_construct(sci, mode);
2509 2501
2510 /* 2502 /*
@@ -2515,7 +2507,7 @@ static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
2515 if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) 2507 if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags))
2516 nilfs_segctor_start_timer(sci); 2508 nilfs_segctor_start_timer(sci);
2517 2509
2518 nilfs_transaction_unlock(sbi); 2510 nilfs_transaction_unlock(sci->sc_super);
2519} 2511}
2520 2512
2521static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) 2513static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci)
@@ -2561,7 +2553,7 @@ static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci)
2561static int nilfs_segctor_thread(void *arg) 2553static int nilfs_segctor_thread(void *arg)
2562{ 2554{
2563 struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; 2555 struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg;
2564 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 2556 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2565 int timeout = 0; 2557 int timeout = 0;
2566 2558
2567 sci->sc_timer.data = (unsigned long)current; 2559 sci->sc_timer.data = (unsigned long)current;
@@ -2672,17 +2664,17 @@ static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci)
2672/* 2664/*
2673 * Setup & clean-up functions 2665 * Setup & clean-up functions
2674 */ 2666 */
2675static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi, 2667static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2676 struct nilfs_root *root) 2668 struct nilfs_root *root)
2677{ 2669{
2670 struct the_nilfs *nilfs = sb->s_fs_info;
2678 struct nilfs_sc_info *sci; 2671 struct nilfs_sc_info *sci;
2679 2672
2680 sci = kzalloc(sizeof(*sci), GFP_KERNEL); 2673 sci = kzalloc(sizeof(*sci), GFP_KERNEL);
2681 if (!sci) 2674 if (!sci)
2682 return NULL; 2675 return NULL;
2683 2676
2684 sci->sc_sbi = sbi; 2677 sci->sc_super = sb;
2685 sci->sc_super = sbi->s_super;
2686 2678
2687 nilfs_get_root(root); 2679 nilfs_get_root(root);
2688 sci->sc_root = root; 2680 sci->sc_root = root;
@@ -2702,10 +2694,10 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi,
2702 sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; 2694 sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
2703 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; 2695 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK;
2704 2696
2705 if (sbi->s_interval) 2697 if (nilfs->ns_interval)
2706 sci->sc_interval = sbi->s_interval; 2698 sci->sc_interval = nilfs->ns_interval;
2707 if (sbi->s_watermark) 2699 if (nilfs->ns_watermark)
2708 sci->sc_watermark = sbi->s_watermark; 2700 sci->sc_watermark = nilfs->ns_watermark;
2709 return sci; 2701 return sci;
2710} 2702}
2711 2703
@@ -2716,12 +2708,11 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
2716 /* The segctord thread was stopped and its timer was removed. 2708 /* The segctord thread was stopped and its timer was removed.
2717 But some tasks remain. */ 2709 But some tasks remain. */
2718 do { 2710 do {
2719 struct nilfs_sb_info *sbi = sci->sc_sbi;
2720 struct nilfs_transaction_info ti; 2711 struct nilfs_transaction_info ti;
2721 2712
2722 nilfs_transaction_lock(sbi, &ti, 0); 2713 nilfs_transaction_lock(sci->sc_super, &ti, 0);
2723 ret = nilfs_segctor_construct(sci, SC_LSEG_SR); 2714 ret = nilfs_segctor_construct(sci, SC_LSEG_SR);
2724 nilfs_transaction_unlock(sbi); 2715 nilfs_transaction_unlock(sci->sc_super);
2725 2716
2726 } while (ret && retrycount-- > 0); 2717 } while (ret && retrycount-- > 0);
2727} 2718}
@@ -2736,10 +2727,10 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
2736 */ 2727 */
2737static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) 2728static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2738{ 2729{
2739 struct nilfs_sb_info *sbi = sci->sc_sbi; 2730 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2740 int flag; 2731 int flag;
2741 2732
2742 up_write(&sbi->s_nilfs->ns_segctor_sem); 2733 up_write(&nilfs->ns_segctor_sem);
2743 2734
2744 spin_lock(&sci->sc_state_lock); 2735 spin_lock(&sci->sc_state_lock);
2745 nilfs_segctor_kill_thread(sci); 2736 nilfs_segctor_kill_thread(sci);
@@ -2753,9 +2744,9 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2753 WARN_ON(!list_empty(&sci->sc_copied_buffers)); 2744 WARN_ON(!list_empty(&sci->sc_copied_buffers));
2754 2745
2755 if (!list_empty(&sci->sc_dirty_files)) { 2746 if (!list_empty(&sci->sc_dirty_files)) {
2756 nilfs_warning(sbi->s_super, __func__, 2747 nilfs_warning(sci->sc_super, __func__,
2757 "dirty file(s) after the final construction\n"); 2748 "dirty file(s) after the final construction\n");
2758 nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1); 2749 nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1);
2759 } 2750 }
2760 2751
2761 WARN_ON(!list_empty(&sci->sc_segbufs)); 2752 WARN_ON(!list_empty(&sci->sc_segbufs));
@@ -2763,79 +2754,78 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2763 2754
2764 nilfs_put_root(sci->sc_root); 2755 nilfs_put_root(sci->sc_root);
2765 2756
2766 down_write(&sbi->s_nilfs->ns_segctor_sem); 2757 down_write(&nilfs->ns_segctor_sem);
2767 2758
2768 del_timer_sync(&sci->sc_timer); 2759 del_timer_sync(&sci->sc_timer);
2769 kfree(sci); 2760 kfree(sci);
2770} 2761}
2771 2762
2772/** 2763/**
2773 * nilfs_attach_segment_constructor - attach a segment constructor 2764 * nilfs_attach_log_writer - attach log writer
2774 * @sbi: nilfs_sb_info 2765 * @sb: super block instance
2775 * @root: root object of the current filesystem tree 2766 * @root: root object of the current filesystem tree
2776 * 2767 *
2777 * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, 2768 * This allocates a log writer object, initializes it, and starts the
2778 * initializes it, and starts the segment constructor. 2769 * log writer.
2779 * 2770 *
2780 * Return Value: On success, 0 is returned. On error, one of the following 2771 * Return Value: On success, 0 is returned. On error, one of the following
2781 * negative error code is returned. 2772 * negative error code is returned.
2782 * 2773 *
2783 * %-ENOMEM - Insufficient memory available. 2774 * %-ENOMEM - Insufficient memory available.
2784 */ 2775 */
2785int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, 2776int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
2786 struct nilfs_root *root)
2787{ 2777{
2778 struct the_nilfs *nilfs = sb->s_fs_info;
2788 int err; 2779 int err;
2789 2780
2790 if (NILFS_SC(sbi)) { 2781 if (nilfs->ns_writer) {
2791 /* 2782 /*
2792 * This happens if the filesystem was remounted 2783 * This happens if the filesystem was remounted
2793 * read/write after nilfs_error degenerated it into a 2784 * read/write after nilfs_error degenerated it into a
2794 * read-only mount. 2785 * read-only mount.
2795 */ 2786 */
2796 nilfs_detach_segment_constructor(sbi); 2787 nilfs_detach_log_writer(sb);
2797 } 2788 }
2798 2789
2799 sbi->s_sc_info = nilfs_segctor_new(sbi, root); 2790 nilfs->ns_writer = nilfs_segctor_new(sb, root);
2800 if (!sbi->s_sc_info) 2791 if (!nilfs->ns_writer)
2801 return -ENOMEM; 2792 return -ENOMEM;
2802 2793
2803 err = nilfs_segctor_start_thread(NILFS_SC(sbi)); 2794 err = nilfs_segctor_start_thread(nilfs->ns_writer);
2804 if (err) { 2795 if (err) {
2805 kfree(sbi->s_sc_info); 2796 kfree(nilfs->ns_writer);
2806 sbi->s_sc_info = NULL; 2797 nilfs->ns_writer = NULL;
2807 } 2798 }
2808 return err; 2799 return err;
2809} 2800}
2810 2801
2811/** 2802/**
2812 * nilfs_detach_segment_constructor - destroy the segment constructor 2803 * nilfs_detach_log_writer - destroy log writer
2813 * @sbi: nilfs_sb_info 2804 * @sb: super block instance
2814 * 2805 *
2815 * nilfs_detach_segment_constructor() kills the segment constructor daemon, 2806 * This kills log writer daemon, frees the log writer object, and
2816 * frees the struct nilfs_sc_info, and destroy the dirty file list. 2807 * destroys list of dirty files.
2817 */ 2808 */
2818void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi) 2809void nilfs_detach_log_writer(struct super_block *sb)
2819{ 2810{
2820 struct the_nilfs *nilfs = sbi->s_nilfs; 2811 struct the_nilfs *nilfs = sb->s_fs_info;
2821 LIST_HEAD(garbage_list); 2812 LIST_HEAD(garbage_list);
2822 2813
2823 down_write(&nilfs->ns_segctor_sem); 2814 down_write(&nilfs->ns_segctor_sem);
2824 if (NILFS_SC(sbi)) { 2815 if (nilfs->ns_writer) {
2825 nilfs_segctor_destroy(NILFS_SC(sbi)); 2816 nilfs_segctor_destroy(nilfs->ns_writer);
2826 sbi->s_sc_info = NULL; 2817 nilfs->ns_writer = NULL;
2827 } 2818 }
2828 2819
2829 /* Force to free the list of dirty files */ 2820 /* Force to free the list of dirty files */
2830 spin_lock(&sbi->s_inode_lock); 2821 spin_lock(&nilfs->ns_inode_lock);
2831 if (!list_empty(&sbi->s_dirty_files)) { 2822 if (!list_empty(&nilfs->ns_dirty_files)) {
2832 list_splice_init(&sbi->s_dirty_files, &garbage_list); 2823 list_splice_init(&nilfs->ns_dirty_files, &garbage_list);
2833 nilfs_warning(sbi->s_super, __func__, 2824 nilfs_warning(sb, __func__,
2834 "Non empty dirty list after the last " 2825 "Hit dirty file after stopped log writer\n");
2835 "segment construction\n"); 2826 }
2836 } 2827 spin_unlock(&nilfs->ns_inode_lock);
2837 spin_unlock(&sbi->s_inode_lock);
2838 up_write(&nilfs->ns_segctor_sem); 2828 up_write(&nilfs->ns_segctor_sem);
2839 2829
2840 nilfs_dispose_list(sbi, &garbage_list, 1); 2830 nilfs_dispose_list(nilfs, &garbage_list, 1);
2841} 2831}
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index cd8056e7cbed..6c02a86745fb 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -27,7 +27,7 @@
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/buffer_head.h> 28#include <linux/buffer_head.h>
29#include <linux/nilfs2_fs.h> 29#include <linux/nilfs2_fs.h>
30#include "sb.h" 30#include "nilfs.h"
31 31
32struct nilfs_root; 32struct nilfs_root;
33 33
@@ -88,7 +88,6 @@ struct nilfs_segsum_pointer {
88/** 88/**
89 * struct nilfs_sc_info - Segment constructor information 89 * struct nilfs_sc_info - Segment constructor information
90 * @sc_super: Back pointer to super_block struct 90 * @sc_super: Back pointer to super_block struct
91 * @sc_sbi: Back pointer to nilfs_sb_info struct
92 * @sc_root: root object of the current filesystem tree 91 * @sc_root: root object of the current filesystem tree
93 * @sc_nblk_inc: Block count of current generation 92 * @sc_nblk_inc: Block count of current generation
94 * @sc_dirty_files: List of files to be written 93 * @sc_dirty_files: List of files to be written
@@ -131,7 +130,6 @@ struct nilfs_segsum_pointer {
131 */ 130 */
132struct nilfs_sc_info { 131struct nilfs_sc_info {
133 struct super_block *sc_super; 132 struct super_block *sc_super;
134 struct nilfs_sb_info *sc_sbi;
135 struct nilfs_root *sc_root; 133 struct nilfs_root *sc_root;
136 134
137 unsigned long sc_nblk_inc; 135 unsigned long sc_nblk_inc;
@@ -235,18 +233,16 @@ extern void nilfs_flush_segment(struct super_block *, ino_t);
235extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, 233extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *,
236 void **); 234 void **);
237 235
238int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, 236int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root);
239 struct nilfs_root *root); 237void nilfs_detach_log_writer(struct super_block *sb);
240extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *);
241 238
242/* recovery.c */ 239/* recovery.c */
243extern int nilfs_read_super_root_block(struct the_nilfs *, sector_t, 240extern int nilfs_read_super_root_block(struct the_nilfs *, sector_t,
244 struct buffer_head **, int); 241 struct buffer_head **, int);
245extern int nilfs_search_super_root(struct the_nilfs *, 242extern int nilfs_search_super_root(struct the_nilfs *,
246 struct nilfs_recovery_info *); 243 struct nilfs_recovery_info *);
247extern int nilfs_salvage_orphan_logs(struct the_nilfs *, 244int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, struct super_block *sb,
248 struct nilfs_sb_info *, 245 struct nilfs_recovery_info *ri);
249 struct nilfs_recovery_info *);
250extern void nilfs_dispose_segment_list(struct list_head *); 246extern void nilfs_dispose_segment_list(struct list_head *);
251 247
252#endif /* _NILFS_SEGMENT_H */ 248#endif /* _NILFS_SEGMENT_H */
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 1673b3d99842..062cca065195 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -43,7 +43,6 @@
43#include <linux/init.h> 43#include <linux/init.h>
44#include <linux/blkdev.h> 44#include <linux/blkdev.h>
45#include <linux/parser.h> 45#include <linux/parser.h>
46#include <linux/random.h>
47#include <linux/crc32.h> 46#include <linux/crc32.h>
48#include <linux/vfs.h> 47#include <linux/vfs.h>
49#include <linux/writeback.h> 48#include <linux/writeback.h>
@@ -72,23 +71,23 @@ struct kmem_cache *nilfs_transaction_cachep;
72struct kmem_cache *nilfs_segbuf_cachep; 71struct kmem_cache *nilfs_segbuf_cachep;
73struct kmem_cache *nilfs_btree_path_cache; 72struct kmem_cache *nilfs_btree_path_cache;
74 73
75static int nilfs_setup_super(struct nilfs_sb_info *sbi, int is_mount); 74static int nilfs_setup_super(struct super_block *sb, int is_mount);
76static int nilfs_remount(struct super_block *sb, int *flags, char *data); 75static int nilfs_remount(struct super_block *sb, int *flags, char *data);
77 76
78static void nilfs_set_error(struct nilfs_sb_info *sbi) 77static void nilfs_set_error(struct super_block *sb)
79{ 78{
80 struct the_nilfs *nilfs = sbi->s_nilfs; 79 struct the_nilfs *nilfs = sb->s_fs_info;
81 struct nilfs_super_block **sbp; 80 struct nilfs_super_block **sbp;
82 81
83 down_write(&nilfs->ns_sem); 82 down_write(&nilfs->ns_sem);
84 if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { 83 if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) {
85 nilfs->ns_mount_state |= NILFS_ERROR_FS; 84 nilfs->ns_mount_state |= NILFS_ERROR_FS;
86 sbp = nilfs_prepare_super(sbi, 0); 85 sbp = nilfs_prepare_super(sb, 0);
87 if (likely(sbp)) { 86 if (likely(sbp)) {
88 sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS); 87 sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS);
89 if (sbp[1]) 88 if (sbp[1])
90 sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS); 89 sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS);
91 nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); 90 nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
92 } 91 }
93 } 92 }
94 up_write(&nilfs->ns_sem); 93 up_write(&nilfs->ns_sem);
@@ -109,7 +108,7 @@ static void nilfs_set_error(struct nilfs_sb_info *sbi)
109void nilfs_error(struct super_block *sb, const char *function, 108void nilfs_error(struct super_block *sb, const char *function,
110 const char *fmt, ...) 109 const char *fmt, ...)
111{ 110{
112 struct nilfs_sb_info *sbi = NILFS_SB(sb); 111 struct the_nilfs *nilfs = sb->s_fs_info;
113 struct va_format vaf; 112 struct va_format vaf;
114 va_list args; 113 va_list args;
115 114
@@ -124,15 +123,15 @@ void nilfs_error(struct super_block *sb, const char *function,
124 va_end(args); 123 va_end(args);
125 124
126 if (!(sb->s_flags & MS_RDONLY)) { 125 if (!(sb->s_flags & MS_RDONLY)) {
127 nilfs_set_error(sbi); 126 nilfs_set_error(sb);
128 127
129 if (nilfs_test_opt(sbi, ERRORS_RO)) { 128 if (nilfs_test_opt(nilfs, ERRORS_RO)) {
130 printk(KERN_CRIT "Remounting filesystem read-only\n"); 129 printk(KERN_CRIT "Remounting filesystem read-only\n");
131 sb->s_flags |= MS_RDONLY; 130 sb->s_flags |= MS_RDONLY;
132 } 131 }
133 } 132 }
134 133
135 if (nilfs_test_opt(sbi, ERRORS_PANIC)) 134 if (nilfs_test_opt(nilfs, ERRORS_PANIC))
136 panic("NILFS (device %s): panic forced after error\n", 135 panic("NILFS (device %s): panic forced after error\n",
137 sb->s_id); 136 sb->s_id);
138} 137}
@@ -189,14 +188,14 @@ void nilfs_destroy_inode(struct inode *inode)
189 call_rcu(&inode->i_rcu, nilfs_i_callback); 188 call_rcu(&inode->i_rcu, nilfs_i_callback);
190} 189}
191 190
192static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) 191static int nilfs_sync_super(struct super_block *sb, int flag)
193{ 192{
194 struct the_nilfs *nilfs = sbi->s_nilfs; 193 struct the_nilfs *nilfs = sb->s_fs_info;
195 int err; 194 int err;
196 195
197 retry: 196 retry:
198 set_buffer_dirty(nilfs->ns_sbh[0]); 197 set_buffer_dirty(nilfs->ns_sbh[0]);
199 if (nilfs_test_opt(sbi, BARRIER)) { 198 if (nilfs_test_opt(nilfs, BARRIER)) {
200 err = __sync_dirty_buffer(nilfs->ns_sbh[0], 199 err = __sync_dirty_buffer(nilfs->ns_sbh[0],
201 WRITE_SYNC | WRITE_FLUSH_FUA); 200 WRITE_SYNC | WRITE_FLUSH_FUA);
202 } else { 201 } else {
@@ -263,10 +262,10 @@ void nilfs_set_log_cursor(struct nilfs_super_block *sbp,
263 spin_unlock(&nilfs->ns_last_segment_lock); 262 spin_unlock(&nilfs->ns_last_segment_lock);
264} 263}
265 264
266struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *sbi, 265struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
267 int flip) 266 int flip)
268{ 267{
269 struct the_nilfs *nilfs = sbi->s_nilfs; 268 struct the_nilfs *nilfs = sb->s_fs_info;
270 struct nilfs_super_block **sbp = nilfs->ns_sbp; 269 struct nilfs_super_block **sbp = nilfs->ns_sbp;
271 270
272 /* nilfs->ns_sem must be locked by the caller. */ 271 /* nilfs->ns_sem must be locked by the caller. */
@@ -276,7 +275,7 @@ struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *sbi,
276 memcpy(sbp[0], sbp[1], nilfs->ns_sbsize); 275 memcpy(sbp[0], sbp[1], nilfs->ns_sbsize);
277 } else { 276 } else {
278 printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", 277 printk(KERN_CRIT "NILFS: superblock broke on dev %s\n",
279 sbi->s_super->s_id); 278 sb->s_id);
280 return NULL; 279 return NULL;
281 } 280 }
282 } else if (sbp[1] && 281 } else if (sbp[1] &&
@@ -290,9 +289,9 @@ struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *sbi,
290 return sbp; 289 return sbp;
291} 290}
292 291
293int nilfs_commit_super(struct nilfs_sb_info *sbi, int flag) 292int nilfs_commit_super(struct super_block *sb, int flag)
294{ 293{
295 struct the_nilfs *nilfs = sbi->s_nilfs; 294 struct the_nilfs *nilfs = sb->s_fs_info;
296 struct nilfs_super_block **sbp = nilfs->ns_sbp; 295 struct nilfs_super_block **sbp = nilfs->ns_sbp;
297 time_t t; 296 time_t t;
298 297
@@ -312,27 +311,28 @@ int nilfs_commit_super(struct nilfs_sb_info *sbi, int flag)
312 nilfs->ns_sbsize)); 311 nilfs->ns_sbsize));
313 } 312 }
314 clear_nilfs_sb_dirty(nilfs); 313 clear_nilfs_sb_dirty(nilfs);
315 return nilfs_sync_super(sbi, flag); 314 return nilfs_sync_super(sb, flag);
316} 315}
317 316
318/** 317/**
319 * nilfs_cleanup_super() - write filesystem state for cleanup 318 * nilfs_cleanup_super() - write filesystem state for cleanup
320 * @sbi: nilfs_sb_info to be unmounted or degraded to read-only 319 * @sb: super block instance to be unmounted or degraded to read-only
321 * 320 *
322 * This function restores state flags in the on-disk super block. 321 * This function restores state flags in the on-disk super block.
323 * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the 322 * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the
324 * filesystem was not clean previously. 323 * filesystem was not clean previously.
325 */ 324 */
326int nilfs_cleanup_super(struct nilfs_sb_info *sbi) 325int nilfs_cleanup_super(struct super_block *sb)
327{ 326{
327 struct the_nilfs *nilfs = sb->s_fs_info;
328 struct nilfs_super_block **sbp; 328 struct nilfs_super_block **sbp;
329 int flag = NILFS_SB_COMMIT; 329 int flag = NILFS_SB_COMMIT;
330 int ret = -EIO; 330 int ret = -EIO;
331 331
332 sbp = nilfs_prepare_super(sbi, 0); 332 sbp = nilfs_prepare_super(sb, 0);
333 if (sbp) { 333 if (sbp) {
334 sbp[0]->s_state = cpu_to_le16(sbi->s_nilfs->ns_mount_state); 334 sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state);
335 nilfs_set_log_cursor(sbp[0], sbi->s_nilfs); 335 nilfs_set_log_cursor(sbp[0], nilfs);
336 if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) { 336 if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) {
337 /* 337 /*
338 * make the "clean" flag also to the opposite 338 * make the "clean" flag also to the opposite
@@ -342,21 +342,20 @@ int nilfs_cleanup_super(struct nilfs_sb_info *sbi)
342 sbp[1]->s_state = sbp[0]->s_state; 342 sbp[1]->s_state = sbp[0]->s_state;
343 flag = NILFS_SB_COMMIT_ALL; 343 flag = NILFS_SB_COMMIT_ALL;
344 } 344 }
345 ret = nilfs_commit_super(sbi, flag); 345 ret = nilfs_commit_super(sb, flag);
346 } 346 }
347 return ret; 347 return ret;
348} 348}
349 349
350static void nilfs_put_super(struct super_block *sb) 350static void nilfs_put_super(struct super_block *sb)
351{ 351{
352 struct nilfs_sb_info *sbi = NILFS_SB(sb); 352 struct the_nilfs *nilfs = sb->s_fs_info;
353 struct the_nilfs *nilfs = sbi->s_nilfs;
354 353
355 nilfs_detach_segment_constructor(sbi); 354 nilfs_detach_log_writer(sb);
356 355
357 if (!(sb->s_flags & MS_RDONLY)) { 356 if (!(sb->s_flags & MS_RDONLY)) {
358 down_write(&nilfs->ns_sem); 357 down_write(&nilfs->ns_sem);
359 nilfs_cleanup_super(sbi); 358 nilfs_cleanup_super(sb);
360 up_write(&nilfs->ns_sem); 359 up_write(&nilfs->ns_sem);
361 } 360 }
362 361
@@ -365,15 +364,12 @@ static void nilfs_put_super(struct super_block *sb)
365 iput(nilfs->ns_dat); 364 iput(nilfs->ns_dat);
366 365
367 destroy_nilfs(nilfs); 366 destroy_nilfs(nilfs);
368 sbi->s_super = NULL;
369 sb->s_fs_info = NULL; 367 sb->s_fs_info = NULL;
370 kfree(sbi);
371} 368}
372 369
373static int nilfs_sync_fs(struct super_block *sb, int wait) 370static int nilfs_sync_fs(struct super_block *sb, int wait)
374{ 371{
375 struct nilfs_sb_info *sbi = NILFS_SB(sb); 372 struct the_nilfs *nilfs = sb->s_fs_info;
376 struct the_nilfs *nilfs = sbi->s_nilfs;
377 struct nilfs_super_block **sbp; 373 struct nilfs_super_block **sbp;
378 int err = 0; 374 int err = 0;
379 375
@@ -383,10 +379,10 @@ static int nilfs_sync_fs(struct super_block *sb, int wait)
383 379
384 down_write(&nilfs->ns_sem); 380 down_write(&nilfs->ns_sem);
385 if (nilfs_sb_dirty(nilfs)) { 381 if (nilfs_sb_dirty(nilfs)) {
386 sbp = nilfs_prepare_super(sbi, nilfs_sb_will_flip(nilfs)); 382 sbp = nilfs_prepare_super(sb, nilfs_sb_will_flip(nilfs));
387 if (likely(sbp)) { 383 if (likely(sbp)) {
388 nilfs_set_log_cursor(sbp[0], nilfs); 384 nilfs_set_log_cursor(sbp[0], nilfs);
389 nilfs_commit_super(sbi, NILFS_SB_COMMIT); 385 nilfs_commit_super(sb, NILFS_SB_COMMIT);
390 } 386 }
391 } 387 }
392 up_write(&nilfs->ns_sem); 388 up_write(&nilfs->ns_sem);
@@ -394,10 +390,10 @@ static int nilfs_sync_fs(struct super_block *sb, int wait)
394 return err; 390 return err;
395} 391}
396 392
397int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt, 393int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
398 struct nilfs_root **rootp) 394 struct nilfs_root **rootp)
399{ 395{
400 struct the_nilfs *nilfs = sbi->s_nilfs; 396 struct the_nilfs *nilfs = sb->s_fs_info;
401 struct nilfs_root *root; 397 struct nilfs_root *root;
402 struct nilfs_checkpoint *raw_cp; 398 struct nilfs_checkpoint *raw_cp;
403 struct buffer_head *bh_cp; 399 struct buffer_head *bh_cp;
@@ -426,7 +422,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt,
426 goto failed; 422 goto failed;
427 } 423 }
428 424
429 err = nilfs_ifile_read(sbi->s_super, root, nilfs->ns_inode_size, 425 err = nilfs_ifile_read(sb, root, nilfs->ns_inode_size,
430 &raw_cp->cp_ifile_inode, &root->ifile); 426 &raw_cp->cp_ifile_inode, &root->ifile);
431 if (err) 427 if (err)
432 goto failed_bh; 428 goto failed_bh;
@@ -450,8 +446,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt,
450 446
451static int nilfs_freeze(struct super_block *sb) 447static int nilfs_freeze(struct super_block *sb)
452{ 448{
453 struct nilfs_sb_info *sbi = NILFS_SB(sb); 449 struct the_nilfs *nilfs = sb->s_fs_info;
454 struct the_nilfs *nilfs = sbi->s_nilfs;
455 int err; 450 int err;
456 451
457 if (sb->s_flags & MS_RDONLY) 452 if (sb->s_flags & MS_RDONLY)
@@ -459,21 +454,20 @@ static int nilfs_freeze(struct super_block *sb)
459 454
460 /* Mark super block clean */ 455 /* Mark super block clean */
461 down_write(&nilfs->ns_sem); 456 down_write(&nilfs->ns_sem);
462 err = nilfs_cleanup_super(sbi); 457 err = nilfs_cleanup_super(sb);
463 up_write(&nilfs->ns_sem); 458 up_write(&nilfs->ns_sem);
464 return err; 459 return err;
465} 460}
466 461
467static int nilfs_unfreeze(struct super_block *sb) 462static int nilfs_unfreeze(struct super_block *sb)
468{ 463{
469 struct nilfs_sb_info *sbi = NILFS_SB(sb); 464 struct the_nilfs *nilfs = sb->s_fs_info;
470 struct the_nilfs *nilfs = sbi->s_nilfs;
471 465
472 if (sb->s_flags & MS_RDONLY) 466 if (sb->s_flags & MS_RDONLY)
473 return 0; 467 return 0;
474 468
475 down_write(&nilfs->ns_sem); 469 down_write(&nilfs->ns_sem);
476 nilfs_setup_super(sbi, false); 470 nilfs_setup_super(sb, false);
477 up_write(&nilfs->ns_sem); 471 up_write(&nilfs->ns_sem);
478 return 0; 472 return 0;
479} 473}
@@ -530,22 +524,22 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
530static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 524static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
531{ 525{
532 struct super_block *sb = vfs->mnt_sb; 526 struct super_block *sb = vfs->mnt_sb;
533 struct nilfs_sb_info *sbi = NILFS_SB(sb); 527 struct the_nilfs *nilfs = sb->s_fs_info;
534 struct nilfs_root *root = NILFS_I(vfs->mnt_root->d_inode)->i_root; 528 struct nilfs_root *root = NILFS_I(vfs->mnt_root->d_inode)->i_root;
535 529
536 if (!nilfs_test_opt(sbi, BARRIER)) 530 if (!nilfs_test_opt(nilfs, BARRIER))
537 seq_puts(seq, ",nobarrier"); 531 seq_puts(seq, ",nobarrier");
538 if (root->cno != NILFS_CPTREE_CURRENT_CNO) 532 if (root->cno != NILFS_CPTREE_CURRENT_CNO)
539 seq_printf(seq, ",cp=%llu", (unsigned long long)root->cno); 533 seq_printf(seq, ",cp=%llu", (unsigned long long)root->cno);
540 if (nilfs_test_opt(sbi, ERRORS_PANIC)) 534 if (nilfs_test_opt(nilfs, ERRORS_PANIC))
541 seq_puts(seq, ",errors=panic"); 535 seq_puts(seq, ",errors=panic");
542 if (nilfs_test_opt(sbi, ERRORS_CONT)) 536 if (nilfs_test_opt(nilfs, ERRORS_CONT))
543 seq_puts(seq, ",errors=continue"); 537 seq_puts(seq, ",errors=continue");
544 if (nilfs_test_opt(sbi, STRICT_ORDER)) 538 if (nilfs_test_opt(nilfs, STRICT_ORDER))
545 seq_puts(seq, ",order=strict"); 539 seq_puts(seq, ",order=strict");
546 if (nilfs_test_opt(sbi, NORECOVERY)) 540 if (nilfs_test_opt(nilfs, NORECOVERY))
547 seq_puts(seq, ",norecovery"); 541 seq_puts(seq, ",norecovery");
548 if (nilfs_test_opt(sbi, DISCARD)) 542 if (nilfs_test_opt(nilfs, DISCARD))
549 seq_puts(seq, ",discard"); 543 seq_puts(seq, ",discard");
550 544
551 return 0; 545 return 0;
@@ -594,7 +588,7 @@ static match_table_t tokens = {
594 588
595static int parse_options(char *options, struct super_block *sb, int is_remount) 589static int parse_options(char *options, struct super_block *sb, int is_remount)
596{ 590{
597 struct nilfs_sb_info *sbi = NILFS_SB(sb); 591 struct the_nilfs *nilfs = sb->s_fs_info;
598 char *p; 592 char *p;
599 substring_t args[MAX_OPT_ARGS]; 593 substring_t args[MAX_OPT_ARGS];
600 594
@@ -609,29 +603,29 @@ static int parse_options(char *options, struct super_block *sb, int is_remount)
609 token = match_token(p, tokens, args); 603 token = match_token(p, tokens, args);
610 switch (token) { 604 switch (token) {
611 case Opt_barrier: 605 case Opt_barrier:
612 nilfs_set_opt(sbi, BARRIER); 606 nilfs_set_opt(nilfs, BARRIER);
613 break; 607 break;
614 case Opt_nobarrier: 608 case Opt_nobarrier:
615 nilfs_clear_opt(sbi, BARRIER); 609 nilfs_clear_opt(nilfs, BARRIER);
616 break; 610 break;
617 case Opt_order: 611 case Opt_order:
618 if (strcmp(args[0].from, "relaxed") == 0) 612 if (strcmp(args[0].from, "relaxed") == 0)
619 /* Ordered data semantics */ 613 /* Ordered data semantics */
620 nilfs_clear_opt(sbi, STRICT_ORDER); 614 nilfs_clear_opt(nilfs, STRICT_ORDER);
621 else if (strcmp(args[0].from, "strict") == 0) 615 else if (strcmp(args[0].from, "strict") == 0)
622 /* Strict in-order semantics */ 616 /* Strict in-order semantics */
623 nilfs_set_opt(sbi, STRICT_ORDER); 617 nilfs_set_opt(nilfs, STRICT_ORDER);
624 else 618 else
625 return 0; 619 return 0;
626 break; 620 break;
627 case Opt_err_panic: 621 case Opt_err_panic:
628 nilfs_write_opt(sbi, ERROR_MODE, ERRORS_PANIC); 622 nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_PANIC);
629 break; 623 break;
630 case Opt_err_ro: 624 case Opt_err_ro:
631 nilfs_write_opt(sbi, ERROR_MODE, ERRORS_RO); 625 nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_RO);
632 break; 626 break;
633 case Opt_err_cont: 627 case Opt_err_cont:
634 nilfs_write_opt(sbi, ERROR_MODE, ERRORS_CONT); 628 nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_CONT);
635 break; 629 break;
636 case Opt_snapshot: 630 case Opt_snapshot:
637 if (is_remount) { 631 if (is_remount) {
@@ -642,13 +636,13 @@ static int parse_options(char *options, struct super_block *sb, int is_remount)
642 } 636 }
643 break; 637 break;
644 case Opt_norecovery: 638 case Opt_norecovery:
645 nilfs_set_opt(sbi, NORECOVERY); 639 nilfs_set_opt(nilfs, NORECOVERY);
646 break; 640 break;
647 case Opt_discard: 641 case Opt_discard:
648 nilfs_set_opt(sbi, DISCARD); 642 nilfs_set_opt(nilfs, DISCARD);
649 break; 643 break;
650 case Opt_nodiscard: 644 case Opt_nodiscard:
651 nilfs_clear_opt(sbi, DISCARD); 645 nilfs_clear_opt(nilfs, DISCARD);
652 break; 646 break;
653 default: 647 default:
654 printk(KERN_ERR 648 printk(KERN_ERR
@@ -660,22 +654,24 @@ static int parse_options(char *options, struct super_block *sb, int is_remount)
660} 654}
661 655
662static inline void 656static inline void
663nilfs_set_default_options(struct nilfs_sb_info *sbi, 657nilfs_set_default_options(struct super_block *sb,
664 struct nilfs_super_block *sbp) 658 struct nilfs_super_block *sbp)
665{ 659{
666 sbi->s_mount_opt = 660 struct the_nilfs *nilfs = sb->s_fs_info;
661
662 nilfs->ns_mount_opt =
667 NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; 663 NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER;
668} 664}
669 665
670static int nilfs_setup_super(struct nilfs_sb_info *sbi, int is_mount) 666static int nilfs_setup_super(struct super_block *sb, int is_mount)
671{ 667{
672 struct the_nilfs *nilfs = sbi->s_nilfs; 668 struct the_nilfs *nilfs = sb->s_fs_info;
673 struct nilfs_super_block **sbp; 669 struct nilfs_super_block **sbp;
674 int max_mnt_count; 670 int max_mnt_count;
675 int mnt_count; 671 int mnt_count;
676 672
677 /* nilfs->ns_sem must be locked by the caller. */ 673 /* nilfs->ns_sem must be locked by the caller. */
678 sbp = nilfs_prepare_super(sbi, 0); 674 sbp = nilfs_prepare_super(sb, 0);
679 if (!sbp) 675 if (!sbp)
680 return -EIO; 676 return -EIO;
681 677
@@ -706,7 +702,7 @@ skip_mount_setup:
706 /* synchronize sbp[1] with sbp[0] */ 702 /* synchronize sbp[1] with sbp[0] */
707 if (sbp[1]) 703 if (sbp[1])
708 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); 704 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
709 return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); 705 return nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
710} 706}
711 707
712struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, 708struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb,
@@ -727,7 +723,7 @@ int nilfs_store_magic_and_option(struct super_block *sb,
727 struct nilfs_super_block *sbp, 723 struct nilfs_super_block *sbp,
728 char *data) 724 char *data)
729{ 725{
730 struct nilfs_sb_info *sbi = NILFS_SB(sb); 726 struct the_nilfs *nilfs = sb->s_fs_info;
731 727
732 sb->s_magic = le16_to_cpu(sbp->s_magic); 728 sb->s_magic = le16_to_cpu(sbp->s_magic);
733 729
@@ -736,12 +732,12 @@ int nilfs_store_magic_and_option(struct super_block *sb,
736 sb->s_flags |= MS_NOATIME; 732 sb->s_flags |= MS_NOATIME;
737#endif 733#endif
738 734
739 nilfs_set_default_options(sbi, sbp); 735 nilfs_set_default_options(sb, sbp);
740 736
741 sbi->s_resuid = le16_to_cpu(sbp->s_def_resuid); 737 nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid);
742 sbi->s_resgid = le16_to_cpu(sbp->s_def_resgid); 738 nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid);
743 sbi->s_interval = le32_to_cpu(sbp->s_c_interval); 739 nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval);
744 sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max); 740 nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max);
745 741
746 return !parse_options(data, sb, 0) ? -EINVAL : 0 ; 742 return !parse_options(data, sb, 0) ? -EINVAL : 0 ;
747} 743}
@@ -822,7 +818,7 @@ static int nilfs_get_root_dentry(struct super_block *sb,
822static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, 818static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
823 struct dentry **root_dentry) 819 struct dentry **root_dentry)
824{ 820{
825 struct the_nilfs *nilfs = NILFS_SB(s)->s_nilfs; 821 struct the_nilfs *nilfs = s->s_fs_info;
826 struct nilfs_root *root; 822 struct nilfs_root *root;
827 int ret; 823 int ret;
828 824
@@ -840,7 +836,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
840 goto out; 836 goto out;
841 } 837 }
842 838
843 ret = nilfs_attach_checkpoint(NILFS_SB(s), cno, false, &root); 839 ret = nilfs_attach_checkpoint(s, cno, false, &root);
844 if (ret) { 840 if (ret) {
845 printk(KERN_ERR "NILFS: error loading snapshot " 841 printk(KERN_ERR "NILFS: error loading snapshot "
846 "(checkpoint number=%llu).\n", 842 "(checkpoint number=%llu).\n",
@@ -874,7 +870,7 @@ static int nilfs_try_to_shrink_tree(struct dentry *root_dentry)
874 870
875int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) 871int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
876{ 872{
877 struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs; 873 struct the_nilfs *nilfs = sb->s_fs_info;
878 struct nilfs_root *root; 874 struct nilfs_root *root;
879 struct inode *inode; 875 struct inode *inode;
880 struct dentry *dentry; 876 struct dentry *dentry;
@@ -887,7 +883,7 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
887 return true; /* protect recent checkpoints */ 883 return true; /* protect recent checkpoints */
888 884
889 ret = false; 885 ret = false;
890 root = nilfs_lookup_root(NILFS_SB(sb)->s_nilfs, cno); 886 root = nilfs_lookup_root(nilfs, cno);
891 if (root) { 887 if (root) {
892 inode = nilfs_ilookup(sb, root, NILFS_ROOT_INO); 888 inode = nilfs_ilookup(sb, root, NILFS_ROOT_INO);
893 if (inode) { 889 if (inode) {
@@ -917,43 +913,21 @@ static int
917nilfs_fill_super(struct super_block *sb, void *data, int silent) 913nilfs_fill_super(struct super_block *sb, void *data, int silent)
918{ 914{
919 struct the_nilfs *nilfs; 915 struct the_nilfs *nilfs;
920 struct nilfs_sb_info *sbi;
921 struct nilfs_root *fsroot; 916 struct nilfs_root *fsroot;
922 struct backing_dev_info *bdi; 917 struct backing_dev_info *bdi;
923 __u64 cno; 918 __u64 cno;
924 int err; 919 int err;
925 920
926 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 921 nilfs = alloc_nilfs(sb->s_bdev);
927 if (!sbi) 922 if (!nilfs)
928 return -ENOMEM; 923 return -ENOMEM;
929 924
930 sb->s_fs_info = sbi; 925 sb->s_fs_info = nilfs;
931 sbi->s_super = sb;
932
933 nilfs = alloc_nilfs(sb->s_bdev);
934 if (!nilfs) {
935 err = -ENOMEM;
936 goto failed_sbi;
937 }
938 sbi->s_nilfs = nilfs;
939 926
940 err = init_nilfs(nilfs, sbi, (char *)data); 927 err = init_nilfs(nilfs, sb, (char *)data);
941 if (err) 928 if (err)
942 goto failed_nilfs; 929 goto failed_nilfs;
943 930
944 spin_lock_init(&sbi->s_inode_lock);
945 INIT_LIST_HEAD(&sbi->s_dirty_files);
946
947 /*
948 * Following initialization is overlapped because
949 * nilfs_sb_info structure has been cleared at the beginning.
950 * But we reserve them to keep our interest and make ready
951 * for the future change.
952 */
953 get_random_bytes(&sbi->s_next_generation,
954 sizeof(sbi->s_next_generation));
955 spin_lock_init(&sbi->s_next_gen_lock);
956
957 sb->s_op = &nilfs_sops; 931 sb->s_op = &nilfs_sops;
958 sb->s_export_op = &nilfs_export_ops; 932 sb->s_export_op = &nilfs_export_ops;
959 sb->s_root = NULL; 933 sb->s_root = NULL;
@@ -962,12 +936,12 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
962 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; 936 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
963 sb->s_bdi = bdi ? : &default_backing_dev_info; 937 sb->s_bdi = bdi ? : &default_backing_dev_info;
964 938
965 err = load_nilfs(nilfs, sbi); 939 err = load_nilfs(nilfs, sb);
966 if (err) 940 if (err)
967 goto failed_nilfs; 941 goto failed_nilfs;
968 942
969 cno = nilfs_last_cno(nilfs); 943 cno = nilfs_last_cno(nilfs);
970 err = nilfs_attach_checkpoint(sbi, cno, true, &fsroot); 944 err = nilfs_attach_checkpoint(sb, cno, true, &fsroot);
971 if (err) { 945 if (err) {
972 printk(KERN_ERR "NILFS: error loading last checkpoint " 946 printk(KERN_ERR "NILFS: error loading last checkpoint "
973 "(checkpoint number=%llu).\n", (unsigned long long)cno); 947 "(checkpoint number=%llu).\n", (unsigned long long)cno);
@@ -975,7 +949,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
975 } 949 }
976 950
977 if (!(sb->s_flags & MS_RDONLY)) { 951 if (!(sb->s_flags & MS_RDONLY)) {
978 err = nilfs_attach_segment_constructor(sbi, fsroot); 952 err = nilfs_attach_log_writer(sb, fsroot);
979 if (err) 953 if (err)
980 goto failed_checkpoint; 954 goto failed_checkpoint;
981 } 955 }
@@ -988,14 +962,14 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
988 962
989 if (!(sb->s_flags & MS_RDONLY)) { 963 if (!(sb->s_flags & MS_RDONLY)) {
990 down_write(&nilfs->ns_sem); 964 down_write(&nilfs->ns_sem);
991 nilfs_setup_super(sbi, true); 965 nilfs_setup_super(sb, true);
992 up_write(&nilfs->ns_sem); 966 up_write(&nilfs->ns_sem);
993 } 967 }
994 968
995 return 0; 969 return 0;
996 970
997 failed_segctor: 971 failed_segctor:
998 nilfs_detach_segment_constructor(sbi); 972 nilfs_detach_log_writer(sb);
999 973
1000 failed_checkpoint: 974 failed_checkpoint:
1001 nilfs_put_root(fsroot); 975 nilfs_put_root(fsroot);
@@ -1007,23 +981,18 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
1007 981
1008 failed_nilfs: 982 failed_nilfs:
1009 destroy_nilfs(nilfs); 983 destroy_nilfs(nilfs);
1010
1011 failed_sbi:
1012 sb->s_fs_info = NULL;
1013 kfree(sbi);
1014 return err; 984 return err;
1015} 985}
1016 986
1017static int nilfs_remount(struct super_block *sb, int *flags, char *data) 987static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1018{ 988{
1019 struct nilfs_sb_info *sbi = NILFS_SB(sb); 989 struct the_nilfs *nilfs = sb->s_fs_info;
1020 struct the_nilfs *nilfs = sbi->s_nilfs;
1021 unsigned long old_sb_flags; 990 unsigned long old_sb_flags;
1022 unsigned long old_mount_opt; 991 unsigned long old_mount_opt;
1023 int err; 992 int err;
1024 993
1025 old_sb_flags = sb->s_flags; 994 old_sb_flags = sb->s_flags;
1026 old_mount_opt = sbi->s_mount_opt; 995 old_mount_opt = nilfs->ns_mount_opt;
1027 996
1028 if (!parse_options(data, sb, 1)) { 997 if (!parse_options(data, sb, 1)) {
1029 err = -EINVAL; 998 err = -EINVAL;
@@ -1043,8 +1012,8 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1043 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 1012 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
1044 goto out; 1013 goto out;
1045 if (*flags & MS_RDONLY) { 1014 if (*flags & MS_RDONLY) {
1046 /* Shutting down the segment constructor */ 1015 /* Shutting down log writer */
1047 nilfs_detach_segment_constructor(sbi); 1016 nilfs_detach_log_writer(sb);
1048 sb->s_flags |= MS_RDONLY; 1017 sb->s_flags |= MS_RDONLY;
1049 1018
1050 /* 1019 /*
@@ -1052,7 +1021,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1052 * the RDONLY flag and then mark the partition as valid again. 1021 * the RDONLY flag and then mark the partition as valid again.
1053 */ 1022 */
1054 down_write(&nilfs->ns_sem); 1023 down_write(&nilfs->ns_sem);
1055 nilfs_cleanup_super(sbi); 1024 nilfs_cleanup_super(sb);
1056 up_write(&nilfs->ns_sem); 1025 up_write(&nilfs->ns_sem);
1057 } else { 1026 } else {
1058 __u64 features; 1027 __u64 features;
@@ -1079,12 +1048,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1079 sb->s_flags &= ~MS_RDONLY; 1048 sb->s_flags &= ~MS_RDONLY;
1080 1049
1081 root = NILFS_I(sb->s_root->d_inode)->i_root; 1050 root = NILFS_I(sb->s_root->d_inode)->i_root;
1082 err = nilfs_attach_segment_constructor(sbi, root); 1051 err = nilfs_attach_log_writer(sb, root);
1083 if (err) 1052 if (err)
1084 goto restore_opts; 1053 goto restore_opts;
1085 1054
1086 down_write(&nilfs->ns_sem); 1055 down_write(&nilfs->ns_sem);
1087 nilfs_setup_super(sbi, true); 1056 nilfs_setup_super(sb, true);
1088 up_write(&nilfs->ns_sem); 1057 up_write(&nilfs->ns_sem);
1089 } 1058 }
1090 out: 1059 out:
@@ -1092,13 +1061,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1092 1061
1093 restore_opts: 1062 restore_opts:
1094 sb->s_flags = old_sb_flags; 1063 sb->s_flags = old_sb_flags;
1095 sbi->s_mount_opt = old_mount_opt; 1064 nilfs->ns_mount_opt = old_mount_opt;
1096 return err; 1065 return err;
1097} 1066}
1098 1067
1099struct nilfs_super_data { 1068struct nilfs_super_data {
1100 struct block_device *bdev; 1069 struct block_device *bdev;
1101 struct nilfs_sb_info *sbi;
1102 __u64 cno; 1070 __u64 cno;
1103 int flags; 1071 int flags;
1104}; 1072};
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index ad4ac607cf57..d2acd1a651f3 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -25,6 +25,7 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/blkdev.h> 26#include <linux/blkdev.h>
27#include <linux/backing-dev.h> 27#include <linux/backing-dev.h>
28#include <linux/random.h>
28#include <linux/crc32.h> 29#include <linux/crc32.h>
29#include "nilfs.h" 30#include "nilfs.h"
30#include "segment.h" 31#include "segment.h"
@@ -75,7 +76,10 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
75 nilfs->ns_bdev = bdev; 76 nilfs->ns_bdev = bdev;
76 atomic_set(&nilfs->ns_ndirtyblks, 0); 77 atomic_set(&nilfs->ns_ndirtyblks, 0);
77 init_rwsem(&nilfs->ns_sem); 78 init_rwsem(&nilfs->ns_sem);
79 INIT_LIST_HEAD(&nilfs->ns_dirty_files);
78 INIT_LIST_HEAD(&nilfs->ns_gc_inodes); 80 INIT_LIST_HEAD(&nilfs->ns_gc_inodes);
81 spin_lock_init(&nilfs->ns_inode_lock);
82 spin_lock_init(&nilfs->ns_next_gen_lock);
79 spin_lock_init(&nilfs->ns_last_segment_lock); 83 spin_lock_init(&nilfs->ns_last_segment_lock);
80 nilfs->ns_cptree = RB_ROOT; 84 nilfs->ns_cptree = RB_ROOT;
81 spin_lock_init(&nilfs->ns_cptree_lock); 85 spin_lock_init(&nilfs->ns_cptree_lock);
@@ -197,16 +201,16 @@ static int nilfs_store_log_cursor(struct the_nilfs *nilfs,
197/** 201/**
198 * load_nilfs - load and recover the nilfs 202 * load_nilfs - load and recover the nilfs
199 * @nilfs: the_nilfs structure to be released 203 * @nilfs: the_nilfs structure to be released
200 * @sbi: nilfs_sb_info used to recover past segment 204 * @sb: super block isntance used to recover past segment
201 * 205 *
202 * load_nilfs() searches and load the latest super root, 206 * load_nilfs() searches and load the latest super root,
203 * attaches the last segment, and does recovery if needed. 207 * attaches the last segment, and does recovery if needed.
204 * The caller must call this exclusively for simultaneous mounts. 208 * The caller must call this exclusively for simultaneous mounts.
205 */ 209 */
206int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) 210int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
207{ 211{
208 struct nilfs_recovery_info ri; 212 struct nilfs_recovery_info ri;
209 unsigned int s_flags = sbi->s_super->s_flags; 213 unsigned int s_flags = sb->s_flags;
210 int really_read_only = bdev_read_only(nilfs->ns_bdev); 214 int really_read_only = bdev_read_only(nilfs->ns_bdev);
211 int valid_fs = nilfs_valid_fs(nilfs); 215 int valid_fs = nilfs_valid_fs(nilfs);
212 int err; 216 int err;
@@ -271,7 +275,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
271 goto scan_error; 275 goto scan_error;
272 } 276 }
273 277
274 err = nilfs_load_super_root(nilfs, sbi->s_super, ri.ri_super_root); 278 err = nilfs_load_super_root(nilfs, sb, ri.ri_super_root);
275 if (unlikely(err)) { 279 if (unlikely(err)) {
276 printk(KERN_ERR "NILFS: error loading super root.\n"); 280 printk(KERN_ERR "NILFS: error loading super root.\n");
277 goto failed; 281 goto failed;
@@ -283,7 +287,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
283 if (s_flags & MS_RDONLY) { 287 if (s_flags & MS_RDONLY) {
284 __u64 features; 288 __u64 features;
285 289
286 if (nilfs_test_opt(sbi, NORECOVERY)) { 290 if (nilfs_test_opt(nilfs, NORECOVERY)) {
287 printk(KERN_INFO "NILFS: norecovery option specified. " 291 printk(KERN_INFO "NILFS: norecovery option specified. "
288 "skipping roll-forward recovery\n"); 292 "skipping roll-forward recovery\n");
289 goto skip_recovery; 293 goto skip_recovery;
@@ -304,21 +308,21 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
304 err = -EROFS; 308 err = -EROFS;
305 goto failed_unload; 309 goto failed_unload;
306 } 310 }
307 sbi->s_super->s_flags &= ~MS_RDONLY; 311 sb->s_flags &= ~MS_RDONLY;
308 } else if (nilfs_test_opt(sbi, NORECOVERY)) { 312 } else if (nilfs_test_opt(nilfs, NORECOVERY)) {
309 printk(KERN_ERR "NILFS: recovery cancelled because norecovery " 313 printk(KERN_ERR "NILFS: recovery cancelled because norecovery "
310 "option was specified for a read/write mount\n"); 314 "option was specified for a read/write mount\n");
311 err = -EINVAL; 315 err = -EINVAL;
312 goto failed_unload; 316 goto failed_unload;
313 } 317 }
314 318
315 err = nilfs_salvage_orphan_logs(nilfs, sbi, &ri); 319 err = nilfs_salvage_orphan_logs(nilfs, sb, &ri);
316 if (err) 320 if (err)
317 goto failed_unload; 321 goto failed_unload;
318 322
319 down_write(&nilfs->ns_sem); 323 down_write(&nilfs->ns_sem);
320 nilfs->ns_mount_state |= NILFS_VALID_FS; /* set "clean" flag */ 324 nilfs->ns_mount_state |= NILFS_VALID_FS; /* set "clean" flag */
321 err = nilfs_cleanup_super(sbi); 325 err = nilfs_cleanup_super(sb);
322 up_write(&nilfs->ns_sem); 326 up_write(&nilfs->ns_sem);
323 327
324 if (err) { 328 if (err) {
@@ -330,7 +334,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
330 334
331 skip_recovery: 335 skip_recovery:
332 nilfs_clear_recovery_info(&ri); 336 nilfs_clear_recovery_info(&ri);
333 sbi->s_super->s_flags = s_flags; 337 sb->s_flags = s_flags;
334 return 0; 338 return 0;
335 339
336 scan_error: 340 scan_error:
@@ -344,7 +348,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
344 348
345 failed: 349 failed:
346 nilfs_clear_recovery_info(&ri); 350 nilfs_clear_recovery_info(&ri);
347 sbi->s_super->s_flags = s_flags; 351 sb->s_flags = s_flags;
348 return err; 352 return err;
349} 353}
350 354
@@ -475,10 +479,13 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
475 return -EIO; 479 return -EIO;
476 } 480 }
477 printk(KERN_WARNING 481 printk(KERN_WARNING
478 "NILFS warning: unable to read primary superblock\n"); 482 "NILFS warning: unable to read primary superblock "
479 } else if (!sbp[1]) 483 "(blocksize = %d)\n", blocksize);
484 } else if (!sbp[1]) {
480 printk(KERN_WARNING 485 printk(KERN_WARNING
481 "NILFS warning: unable to read secondary superblock\n"); 486 "NILFS warning: unable to read secondary superblock "
487 "(blocksize = %d)\n", blocksize);
488 }
482 489
483 /* 490 /*
484 * Compare two super blocks and set 1 in swp if the secondary 491 * Compare two super blocks and set 1 in swp if the secondary
@@ -505,7 +512,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
505 512
506 if (!valid[!swp]) 513 if (!valid[!swp])
507 printk(KERN_WARNING "NILFS warning: broken superblock. " 514 printk(KERN_WARNING "NILFS warning: broken superblock. "
508 "using spare superblock.\n"); 515 "using spare superblock (blocksize = %d).\n", blocksize);
509 if (swp) 516 if (swp)
510 nilfs_swap_super_block(nilfs); 517 nilfs_swap_super_block(nilfs);
511 518
@@ -519,7 +526,6 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
519/** 526/**
520 * init_nilfs - initialize a NILFS instance. 527 * init_nilfs - initialize a NILFS instance.
521 * @nilfs: the_nilfs structure 528 * @nilfs: the_nilfs structure
522 * @sbi: nilfs_sb_info
523 * @sb: super block 529 * @sb: super block
524 * @data: mount options 530 * @data: mount options
525 * 531 *
@@ -530,9 +536,8 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
530 * Return Value: On success, 0 is returned. On error, a negative error 536 * Return Value: On success, 0 is returned. On error, a negative error
531 * code is returned. 537 * code is returned.
532 */ 538 */
533int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) 539int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
534{ 540{
535 struct super_block *sb = sbi->s_super;
536 struct nilfs_super_block *sbp; 541 struct nilfs_super_block *sbp;
537 int blocksize; 542 int blocksize;
538 int err; 543 int err;
@@ -588,6 +593,9 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
588 nilfs->ns_blocksize_bits = sb->s_blocksize_bits; 593 nilfs->ns_blocksize_bits = sb->s_blocksize_bits;
589 nilfs->ns_blocksize = blocksize; 594 nilfs->ns_blocksize = blocksize;
590 595
596 get_random_bytes(&nilfs->ns_next_generation,
597 sizeof(nilfs->ns_next_generation));
598
591 err = nilfs_store_disk_layout(nilfs, sbp); 599 err = nilfs_store_disk_layout(nilfs, sbp);
592 if (err) 600 if (err)
593 goto failed_sbh; 601 goto failed_sbh;
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index fd85e4c05c6b..f4968145c2a3 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -31,7 +31,8 @@
31#include <linux/blkdev.h> 31#include <linux/blkdev.h>
32#include <linux/backing-dev.h> 32#include <linux/backing-dev.h>
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include "sb.h" 34
35struct nilfs_sc_info;
35 36
36/* the_nilfs struct */ 37/* the_nilfs struct */
37enum { 38enum {
@@ -65,13 +66,23 @@ enum {
65 * @ns_last_cno: checkpoint number of the latest segment 66 * @ns_last_cno: checkpoint number of the latest segment
66 * @ns_prot_seq: least sequence number of segments which must not be reclaimed 67 * @ns_prot_seq: least sequence number of segments which must not be reclaimed
67 * @ns_prev_seq: base sequence number used to decide if advance log cursor 68 * @ns_prev_seq: base sequence number used to decide if advance log cursor
68 * @ns_segctor_sem: segment constructor semaphore 69 * @ns_writer: log writer
70 * @ns_segctor_sem: semaphore protecting log write
69 * @ns_dat: DAT file inode 71 * @ns_dat: DAT file inode
70 * @ns_cpfile: checkpoint file inode 72 * @ns_cpfile: checkpoint file inode
71 * @ns_sufile: segusage file inode 73 * @ns_sufile: segusage file inode
72 * @ns_cptree: rb-tree of all mounted checkpoints (nilfs_root) 74 * @ns_cptree: rb-tree of all mounted checkpoints (nilfs_root)
73 * @ns_cptree_lock: lock protecting @ns_cptree 75 * @ns_cptree_lock: lock protecting @ns_cptree
76 * @ns_dirty_files: list of dirty files
77 * @ns_inode_lock: lock protecting @ns_dirty_files
74 * @ns_gc_inodes: dummy inodes to keep live blocks 78 * @ns_gc_inodes: dummy inodes to keep live blocks
79 * @ns_next_generation: next generation number for inodes
80 * @ns_next_gen_lock: lock protecting @ns_next_generation
81 * @ns_mount_opt: mount options
82 * @ns_resuid: uid for reserved blocks
83 * @ns_resgid: gid for reserved blocks
84 * @ns_interval: checkpoint creation interval
85 * @ns_watermark: watermark for the number of dirty buffers
75 * @ns_blocksize_bits: bit length of block size 86 * @ns_blocksize_bits: bit length of block size
76 * @ns_blocksize: block size 87 * @ns_blocksize: block size
77 * @ns_nsegments: number of segments in filesystem 88 * @ns_nsegments: number of segments in filesystem
@@ -131,6 +142,7 @@ struct the_nilfs {
131 u64 ns_prot_seq; 142 u64 ns_prot_seq;
132 u64 ns_prev_seq; 143 u64 ns_prev_seq;
133 144
145 struct nilfs_sc_info *ns_writer;
134 struct rw_semaphore ns_segctor_sem; 146 struct rw_semaphore ns_segctor_sem;
135 147
136 /* 148 /*
@@ -145,9 +157,25 @@ struct the_nilfs {
145 struct rb_root ns_cptree; 157 struct rb_root ns_cptree;
146 spinlock_t ns_cptree_lock; 158 spinlock_t ns_cptree_lock;
147 159
160 /* Dirty inode list */
161 struct list_head ns_dirty_files;
162 spinlock_t ns_inode_lock;
163
148 /* GC inode list */ 164 /* GC inode list */
149 struct list_head ns_gc_inodes; 165 struct list_head ns_gc_inodes;
150 166
167 /* Inode allocator */
168 u32 ns_next_generation;
169 spinlock_t ns_next_gen_lock;
170
171 /* Mount options */
172 unsigned long ns_mount_opt;
173
174 uid_t ns_resuid;
175 gid_t ns_resgid;
176 unsigned long ns_interval;
177 unsigned long ns_watermark;
178
151 /* Disk layout information (static) */ 179 /* Disk layout information (static) */
152 unsigned int ns_blocksize_bits; 180 unsigned int ns_blocksize_bits;
153 unsigned int ns_blocksize; 181 unsigned int ns_blocksize;
@@ -180,6 +208,20 @@ THE_NILFS_FNS(DISCONTINUED, discontinued)
180THE_NILFS_FNS(GC_RUNNING, gc_running) 208THE_NILFS_FNS(GC_RUNNING, gc_running)
181THE_NILFS_FNS(SB_DIRTY, sb_dirty) 209THE_NILFS_FNS(SB_DIRTY, sb_dirty)
182 210
211/*
212 * Mount option operations
213 */
214#define nilfs_clear_opt(nilfs, opt) \
215 do { (nilfs)->ns_mount_opt &= ~NILFS_MOUNT_##opt; } while (0)
216#define nilfs_set_opt(nilfs, opt) \
217 do { (nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt; } while (0)
218#define nilfs_test_opt(nilfs, opt) ((nilfs)->ns_mount_opt & NILFS_MOUNT_##opt)
219#define nilfs_write_opt(nilfs, mask, opt) \
220 do { (nilfs)->ns_mount_opt = \
221 (((nilfs)->ns_mount_opt & ~NILFS_MOUNT_##mask) | \
222 NILFS_MOUNT_##opt); \
223 } while (0)
224
183/** 225/**
184 * struct nilfs_root - nilfs root object 226 * struct nilfs_root - nilfs root object
185 * @cno: checkpoint number 227 * @cno: checkpoint number
@@ -224,15 +266,14 @@ static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
224void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); 266void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
225struct the_nilfs *alloc_nilfs(struct block_device *bdev); 267struct the_nilfs *alloc_nilfs(struct block_device *bdev);
226void destroy_nilfs(struct the_nilfs *nilfs); 268void destroy_nilfs(struct the_nilfs *nilfs);
227int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); 269int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data);
228int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); 270int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
229int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t); 271int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t);
230int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); 272int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
231struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno); 273struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno);
232struct nilfs_root *nilfs_find_or_create_root(struct the_nilfs *nilfs, 274struct nilfs_root *nilfs_find_or_create_root(struct the_nilfs *nilfs,
233 __u64 cno); 275 __u64 cno);
234void nilfs_put_root(struct nilfs_root *root); 276void nilfs_put_root(struct nilfs_root *root);
235struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64);
236int nilfs_near_disk_full(struct the_nilfs *); 277int nilfs_near_disk_full(struct the_nilfs *);
237void nilfs_fall_back_super_block(struct the_nilfs *); 278void nilfs_fall_back_super_block(struct the_nilfs *);
238void nilfs_swap_super_block(struct the_nilfs *); 279void nilfs_swap_super_block(struct the_nilfs *);
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 4c29fcf557d1..07ea8d3e6ea2 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -22,13 +22,14 @@
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/spinlock.h> 24#include <linux/spinlock.h>
25#include <linux/writeback.h> /* for inode_lock */
26 25
27#include <asm/atomic.h> 26#include <asm/atomic.h>
28 27
29#include <linux/fsnotify_backend.h> 28#include <linux/fsnotify_backend.h>
30#include "fsnotify.h" 29#include "fsnotify.h"
31 30
31#include "../internal.h"
32
32/* 33/*
33 * Recalculate the mask of events relevant to a given inode locked. 34 * Recalculate the mask of events relevant to a given inode locked.
34 */ 35 */
@@ -237,15 +238,14 @@ out:
237 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. 238 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
238 * @list: list of inodes being unmounted (sb->s_inodes) 239 * @list: list of inodes being unmounted (sb->s_inodes)
239 * 240 *
240 * Called with inode_lock held, protecting the unmounting super block's list 241 * Called during unmount with no locks held, so needs to be safe against
241 * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. 242 * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block.
242 * We temporarily drop inode_lock, however, and CAN block.
243 */ 243 */
244void fsnotify_unmount_inodes(struct list_head *list) 244void fsnotify_unmount_inodes(struct list_head *list)
245{ 245{
246 struct inode *inode, *next_i, *need_iput = NULL; 246 struct inode *inode, *next_i, *need_iput = NULL;
247 247
248 spin_lock(&inode_lock); 248 spin_lock(&inode_sb_list_lock);
249 list_for_each_entry_safe(inode, next_i, list, i_sb_list) { 249 list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
250 struct inode *need_iput_tmp; 250 struct inode *need_iput_tmp;
251 251
@@ -254,8 +254,11 @@ void fsnotify_unmount_inodes(struct list_head *list)
254 * I_WILL_FREE, or I_NEW which is fine because by that point 254 * I_WILL_FREE, or I_NEW which is fine because by that point
255 * the inode cannot have any associated watches. 255 * the inode cannot have any associated watches.
256 */ 256 */
257 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 257 spin_lock(&inode->i_lock);
258 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
259 spin_unlock(&inode->i_lock);
258 continue; 260 continue;
261 }
259 262
260 /* 263 /*
261 * If i_count is zero, the inode cannot have any watches and 264 * If i_count is zero, the inode cannot have any watches and
@@ -263,8 +266,10 @@ void fsnotify_unmount_inodes(struct list_head *list)
263 * evict all inodes with zero i_count from icache which is 266 * evict all inodes with zero i_count from icache which is
264 * unnecessarily violent and may in fact be illegal to do. 267 * unnecessarily violent and may in fact be illegal to do.
265 */ 268 */
266 if (!atomic_read(&inode->i_count)) 269 if (!atomic_read(&inode->i_count)) {
270 spin_unlock(&inode->i_lock);
267 continue; 271 continue;
272 }
268 273
269 need_iput_tmp = need_iput; 274 need_iput_tmp = need_iput;
270 need_iput = NULL; 275 need_iput = NULL;
@@ -274,22 +279,25 @@ void fsnotify_unmount_inodes(struct list_head *list)
274 __iget(inode); 279 __iget(inode);
275 else 280 else
276 need_iput_tmp = NULL; 281 need_iput_tmp = NULL;
282 spin_unlock(&inode->i_lock);
277 283
278 /* In case the dropping of a reference would nuke next_i. */ 284 /* In case the dropping of a reference would nuke next_i. */
279 if ((&next_i->i_sb_list != list) && 285 if ((&next_i->i_sb_list != list) &&
280 atomic_read(&next_i->i_count) && 286 atomic_read(&next_i->i_count)) {
281 !(next_i->i_state & (I_FREEING | I_WILL_FREE))) { 287 spin_lock(&next_i->i_lock);
282 __iget(next_i); 288 if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
283 need_iput = next_i; 289 __iget(next_i);
290 need_iput = next_i;
291 }
292 spin_unlock(&next_i->i_lock);
284 } 293 }
285 294
286 /* 295 /*
287 * We can safely drop inode_lock here because we hold 296 * We can safely drop inode_sb_list_lock here because we hold
288 * references on both inode and next_i. Also no new inodes 297 * references on both inode and next_i. Also no new inodes
289 * will be added since the umount has begun. Finally, 298 * will be added since the umount has begun.
290 * iprune_mutex keeps shrink_icache_memory() away.
291 */ 299 */
292 spin_unlock(&inode_lock); 300 spin_unlock(&inode_sb_list_lock);
293 301
294 if (need_iput_tmp) 302 if (need_iput_tmp)
295 iput(need_iput_tmp); 303 iput(need_iput_tmp);
@@ -301,7 +309,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
301 309
302 iput(inode); 310 iput(inode);
303 311
304 spin_lock(&inode_lock); 312 spin_lock(&inode_sb_list_lock);
305 } 313 }
306 spin_unlock(&inode_lock); 314 spin_unlock(&inode_sb_list_lock);
307} 315}
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 325185e514bb..50c00856f730 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -91,7 +91,6 @@
91#include <linux/slab.h> 91#include <linux/slab.h>
92#include <linux/spinlock.h> 92#include <linux/spinlock.h>
93#include <linux/srcu.h> 93#include <linux/srcu.h>
94#include <linux/writeback.h> /* for inode_lock */
95 94
96#include <asm/atomic.h> 95#include <asm/atomic.h>
97 96
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 85eebff6d0d7..e86577d6c5c3 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -23,7 +23,6 @@
23#include <linux/mount.h> 23#include <linux/mount.h>
24#include <linux/mutex.h> 24#include <linux/mutex.h>
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <linux/writeback.h> /* for inode_lock */
27 26
28#include <asm/atomic.h> 27#include <asm/atomic.h>
29 28
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 4ff028fcfd6e..30206b238433 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -2,18 +2,13 @@
2 2
3obj-$(CONFIG_NTFS_FS) += ntfs.o 3obj-$(CONFIG_NTFS_FS) += ntfs.o
4 4
5ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ 5ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ 6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
7 unistr.o upcase.o 7 unistr.o upcase.o
8 8
9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.30\" 9ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
10 10
11ifeq ($(CONFIG_NTFS_DEBUG),y) 11ccflags-y := -DNTFS_VERSION=\"2.1.30\"
12EXTRA_CFLAGS += -DDEBUG 12ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG
13endif 13ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW
14 14
15ifeq ($(CONFIG_NTFS_RW),y)
16EXTRA_CFLAGS += -DNTFS_RW
17
18ntfs-objs += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
19endif
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index c3c2c7ac9020..0b1e885b8cf8 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1543,8 +1543,6 @@ err_out:
1543 */ 1543 */
1544const struct address_space_operations ntfs_aops = { 1544const struct address_space_operations ntfs_aops = {
1545 .readpage = ntfs_readpage, /* Fill page with data. */ 1545 .readpage = ntfs_readpage, /* Fill page with data. */
1546 .sync_page = block_sync_page, /* Currently, just unplugs the
1547 disk request queue. */
1548#ifdef NTFS_RW 1546#ifdef NTFS_RW
1549 .writepage = ntfs_writepage, /* Write dirty page to disk. */ 1547 .writepage = ntfs_writepage, /* Write dirty page to disk. */
1550#endif /* NTFS_RW */ 1548#endif /* NTFS_RW */
@@ -1560,8 +1558,6 @@ const struct address_space_operations ntfs_aops = {
1560 */ 1558 */
1561const struct address_space_operations ntfs_mst_aops = { 1559const struct address_space_operations ntfs_mst_aops = {
1562 .readpage = ntfs_readpage, /* Fill page with data. */ 1560 .readpage = ntfs_readpage, /* Fill page with data. */
1563 .sync_page = block_sync_page, /* Currently, just unplugs the
1564 disk request queue. */
1565#ifdef NTFS_RW 1561#ifdef NTFS_RW
1566 .writepage = ntfs_writepage, /* Write dirty page to disk. */ 1562 .writepage = ntfs_writepage, /* Write dirty page to disk. */
1567 .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty 1563 .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 6551c7cbad92..ef9ed854255c 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -698,8 +698,7 @@ lock_retry_remap:
698 "uptodate! Unplugging the disk queue " 698 "uptodate! Unplugging the disk queue "
699 "and rescheduling."); 699 "and rescheduling.");
700 get_bh(tbh); 700 get_bh(tbh);
701 blk_run_address_space(mapping); 701 io_schedule();
702 schedule();
703 put_bh(tbh); 702 put_bh(tbh);
704 if (unlikely(!buffer_uptodate(tbh))) 703 if (unlikely(!buffer_uptodate(tbh)))
705 goto read_err; 704 goto read_err;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index a627ed82c0a3..0b56c6b7ec01 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -54,7 +54,7 @@
54 * 54 *
55 * Return 1 if the attributes match and 0 if not. 55 * Return 1 if the attributes match and 0 if not.
56 * 56 *
57 * NOTE: This function runs with the inode_lock spin lock held so it is not 57 * NOTE: This function runs with the inode->i_lock spin lock held so it is not
58 * allowed to sleep. 58 * allowed to sleep.
59 */ 59 */
60int ntfs_test_inode(struct inode *vi, ntfs_attr *na) 60int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
@@ -98,7 +98,7 @@ int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
98 * 98 *
99 * Return 0 on success and -errno on error. 99 * Return 0 on success and -errno on error.
100 * 100 *
101 * NOTE: This function runs with the inode_lock spin lock held so it is not 101 * NOTE: This function runs with the inode->i_lock spin lock held so it is not
102 * allowed to sleep. (Hence the GFP_ATOMIC allocation.) 102 * allowed to sleep. (Hence the GFP_ATOMIC allocation.)
103 */ 103 */
104static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) 104static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 07d9fd854350..d8a0313e99e6 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -1,6 +1,6 @@
1EXTRA_CFLAGS += -Ifs/ocfs2 1ccflags-y := -Ifs/ocfs2
2 2
3EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES 3ccflags-y += -DCATCH_BH_JBD_RACES
4 4
5obj-$(CONFIG_OCFS2_FS) += \ 5obj-$(CONFIG_OCFS2_FS) += \
6 ocfs2.o \ 6 ocfs2.o \
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 704f6b1742f3..e913ad130fdd 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -24,7 +24,6 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/string.h> 25#include <linux/string.h>
26 26
27#define MLOG_MASK_PREFIX ML_INODE
28#include <cluster/masklog.h> 27#include <cluster/masklog.h>
29 28
30#include "ocfs2.h" 29#include "ocfs2.h"
@@ -497,7 +496,7 @@ static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name,
497 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 496 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
498 return -EOPNOTSUPP; 497 return -EOPNOTSUPP;
499 498
500 if (!is_owner_or_cap(inode)) 499 if (!inode_owner_or_capable(inode))
501 return -EPERM; 500 return -EPERM;
502 501
503 if (value) { 502 if (value) {
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index e4984e259cb6..b27a0d86f8c5 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -30,7 +30,6 @@
30#include <linux/swap.h> 30#include <linux/swap.h>
31#include <linux/quotaops.h> 31#include <linux/quotaops.h>
32 32
33#define MLOG_MASK_PREFIX ML_DISK_ALLOC
34#include <cluster/masklog.h> 33#include <cluster/masklog.h>
35 34
36#include "ocfs2.h" 35#include "ocfs2.h"
@@ -50,6 +49,7 @@
50#include "uptodate.h" 49#include "uptodate.h"
51#include "xattr.h" 50#include "xattr.h"
52#include "refcounttree.h" 51#include "refcounttree.h"
52#include "ocfs2_trace.h"
53 53
54#include "buffer_head_io.h" 54#include "buffer_head_io.h"
55 55
@@ -886,8 +886,7 @@ static int ocfs2_validate_extent_block(struct super_block *sb,
886 struct ocfs2_extent_block *eb = 886 struct ocfs2_extent_block *eb =
887 (struct ocfs2_extent_block *)bh->b_data; 887 (struct ocfs2_extent_block *)bh->b_data;
888 888
889 mlog(0, "Validating extent block %llu\n", 889 trace_ocfs2_validate_extent_block((unsigned long long)bh->b_blocknr);
890 (unsigned long long)bh->b_blocknr);
891 890
892 BUG_ON(!buffer_uptodate(bh)); 891 BUG_ON(!buffer_uptodate(bh));
893 892
@@ -965,8 +964,6 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb,
965 struct buffer_head *eb_bh = NULL; 964 struct buffer_head *eb_bh = NULL;
966 u64 last_eb_blk = 0; 965 u64 last_eb_blk = 0;
967 966
968 mlog_entry_void();
969
970 el = et->et_root_el; 967 el = et->et_root_el;
971 last_eb_blk = ocfs2_et_get_last_eb_blk(et); 968 last_eb_blk = ocfs2_et_get_last_eb_blk(et);
972 969
@@ -987,7 +984,7 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb,
987bail: 984bail:
988 brelse(eb_bh); 985 brelse(eb_bh);
989 986
990 mlog_exit(retval); 987 trace_ocfs2_num_free_extents(retval);
991 return retval; 988 return retval;
992} 989}
993 990
@@ -1010,8 +1007,6 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
1010 OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci)); 1007 OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
1011 struct ocfs2_extent_block *eb; 1008 struct ocfs2_extent_block *eb;
1012 1009
1013 mlog_entry_void();
1014
1015 count = 0; 1010 count = 0;
1016 while (count < wanted) { 1011 while (count < wanted) {
1017 status = ocfs2_claim_metadata(handle, 1012 status = ocfs2_claim_metadata(handle,
@@ -1074,8 +1069,8 @@ bail:
1074 brelse(bhs[i]); 1069 brelse(bhs[i]);
1075 bhs[i] = NULL; 1070 bhs[i] = NULL;
1076 } 1071 }
1072 mlog_errno(status);
1077 } 1073 }
1078 mlog_exit(status);
1079 return status; 1074 return status;
1080} 1075}
1081 1076
@@ -1173,8 +1168,6 @@ static int ocfs2_add_branch(handle_t *handle,
1173 struct ocfs2_extent_list *el; 1168 struct ocfs2_extent_list *el;
1174 u32 new_cpos, root_end; 1169 u32 new_cpos, root_end;
1175 1170
1176 mlog_entry_void();
1177
1178 BUG_ON(!last_eb_bh || !*last_eb_bh); 1171 BUG_ON(!last_eb_bh || !*last_eb_bh);
1179 1172
1180 if (eb_bh) { 1173 if (eb_bh) {
@@ -1200,8 +1193,11 @@ static int ocfs2_add_branch(handle_t *handle,
1200 * from new_cpos). 1193 * from new_cpos).
1201 */ 1194 */
1202 if (root_end > new_cpos) { 1195 if (root_end > new_cpos) {
1203 mlog(0, "adjust the cluster end from %u to %u\n", 1196 trace_ocfs2_adjust_rightmost_branch(
1204 root_end, new_cpos); 1197 (unsigned long long)
1198 ocfs2_metadata_cache_owner(et->et_ci),
1199 root_end, new_cpos);
1200
1205 status = ocfs2_adjust_rightmost_branch(handle, et); 1201 status = ocfs2_adjust_rightmost_branch(handle, et);
1206 if (status) { 1202 if (status) {
1207 mlog_errno(status); 1203 mlog_errno(status);
@@ -1332,7 +1328,6 @@ bail:
1332 kfree(new_eb_bhs); 1328 kfree(new_eb_bhs);
1333 } 1329 }
1334 1330
1335 mlog_exit(status);
1336 return status; 1331 return status;
1337} 1332}
1338 1333
@@ -1353,8 +1348,6 @@ static int ocfs2_shift_tree_depth(handle_t *handle,
1353 struct ocfs2_extent_list *root_el; 1348 struct ocfs2_extent_list *root_el;
1354 struct ocfs2_extent_list *eb_el; 1349 struct ocfs2_extent_list *eb_el;
1355 1350
1356 mlog_entry_void();
1357
1358 status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac, 1351 status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac,
1359 &new_eb_bh); 1352 &new_eb_bh);
1360 if (status < 0) { 1353 if (status < 0) {
@@ -1415,7 +1408,6 @@ static int ocfs2_shift_tree_depth(handle_t *handle,
1415bail: 1408bail:
1416 brelse(new_eb_bh); 1409 brelse(new_eb_bh);
1417 1410
1418 mlog_exit(status);
1419 return status; 1411 return status;
1420} 1412}
1421 1413
@@ -1446,8 +1438,6 @@ static int ocfs2_find_branch_target(struct ocfs2_extent_tree *et,
1446 struct buffer_head *bh = NULL; 1438 struct buffer_head *bh = NULL;
1447 struct buffer_head *lowest_bh = NULL; 1439 struct buffer_head *lowest_bh = NULL;
1448 1440
1449 mlog_entry_void();
1450
1451 *target_bh = NULL; 1441 *target_bh = NULL;
1452 1442
1453 el = et->et_root_el; 1443 el = et->et_root_el;
@@ -1503,7 +1493,6 @@ static int ocfs2_find_branch_target(struct ocfs2_extent_tree *et,
1503bail: 1493bail:
1504 brelse(bh); 1494 brelse(bh);
1505 1495
1506 mlog_exit(status);
1507 return status; 1496 return status;
1508} 1497}
1509 1498
@@ -1540,7 +1529,10 @@ static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et,
1540 * another tree level */ 1529 * another tree level */
1541 if (shift) { 1530 if (shift) {
1542 BUG_ON(bh); 1531 BUG_ON(bh);
1543 mlog(0, "need to shift tree depth (current = %d)\n", depth); 1532 trace_ocfs2_grow_tree(
1533 (unsigned long long)
1534 ocfs2_metadata_cache_owner(et->et_ci),
1535 depth);
1544 1536
1545 /* ocfs2_shift_tree_depth will return us a buffer with 1537 /* ocfs2_shift_tree_depth will return us a buffer with
1546 * the new extent block (so we can pass that to 1538 * the new extent block (so we can pass that to
@@ -1570,7 +1562,6 @@ static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et,
1570 1562
1571 /* call ocfs2_add_branch to add the final part of the tree with 1563 /* call ocfs2_add_branch to add the final part of the tree with
1572 * the new data. */ 1564 * the new data. */
1573 mlog(0, "add branch. bh = %p\n", bh);
1574 ret = ocfs2_add_branch(handle, et, bh, last_eb_bh, 1565 ret = ocfs2_add_branch(handle, et, bh, last_eb_bh,
1575 meta_ac); 1566 meta_ac);
1576 if (ret < 0) { 1567 if (ret < 0) {
@@ -1645,8 +1636,9 @@ static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el,
1645 } 1636 }
1646 insert_index = i; 1637 insert_index = i;
1647 1638
1648 mlog(0, "ins %u: index %d, has_empty %d, next_free %d, count %d\n", 1639 trace_ocfs2_rotate_leaf(insert_cpos, insert_index,
1649 insert_cpos, insert_index, has_empty, next_free, le16_to_cpu(el->l_count)); 1640 has_empty, next_free,
1641 le16_to_cpu(el->l_count));
1650 1642
1651 BUG_ON(insert_index < 0); 1643 BUG_ON(insert_index < 0);
1652 BUG_ON(insert_index >= le16_to_cpu(el->l_count)); 1644 BUG_ON(insert_index >= le16_to_cpu(el->l_count));
@@ -2059,7 +2051,7 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
2059 left_el = path_leaf_el(left_path); 2051 left_el = path_leaf_el(left_path);
2060 right_el = path_leaf_el(right_path); 2052 right_el = path_leaf_el(right_path);
2061 for(i = left_path->p_tree_depth - 1; i > subtree_index; i--) { 2053 for(i = left_path->p_tree_depth - 1; i > subtree_index; i--) {
2062 mlog(0, "Adjust records at index %u\n", i); 2054 trace_ocfs2_complete_edge_insert(i);
2063 2055
2064 /* 2056 /*
2065 * One nice property of knowing that all of these 2057 * One nice property of knowing that all of these
@@ -2389,7 +2381,9 @@ static int ocfs2_rotate_tree_right(handle_t *handle,
2389 goto out; 2381 goto out;
2390 } 2382 }
2391 2383
2392 mlog(0, "Insert: %u, first left path cpos: %u\n", insert_cpos, cpos); 2384 trace_ocfs2_rotate_tree_right(
2385 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
2386 insert_cpos, cpos);
2393 2387
2394 /* 2388 /*
2395 * What we want to do here is: 2389 * What we want to do here is:
@@ -2418,8 +2412,10 @@ static int ocfs2_rotate_tree_right(handle_t *handle,
2418 * rotating subtrees. 2412 * rotating subtrees.
2419 */ 2413 */
2420 while (cpos && insert_cpos <= cpos) { 2414 while (cpos && insert_cpos <= cpos) {
2421 mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n", 2415 trace_ocfs2_rotate_tree_right(
2422 insert_cpos, cpos); 2416 (unsigned long long)
2417 ocfs2_metadata_cache_owner(et->et_ci),
2418 insert_cpos, cpos);
2423 2419
2424 ret = ocfs2_find_path(et->et_ci, left_path, cpos); 2420 ret = ocfs2_find_path(et->et_ci, left_path, cpos);
2425 if (ret) { 2421 if (ret) {
@@ -2461,10 +2457,10 @@ static int ocfs2_rotate_tree_right(handle_t *handle,
2461 2457
2462 start = ocfs2_find_subtree_root(et, left_path, right_path); 2458 start = ocfs2_find_subtree_root(et, left_path, right_path);
2463 2459
2464 mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", 2460 trace_ocfs2_rotate_subtree(start,
2465 start, 2461 (unsigned long long)
2466 (unsigned long long) right_path->p_node[start].bh->b_blocknr, 2462 right_path->p_node[start].bh->b_blocknr,
2467 right_path->p_tree_depth); 2463 right_path->p_tree_depth);
2468 2464
2469 ret = ocfs2_extend_rotate_transaction(handle, start, 2465 ret = ocfs2_extend_rotate_transaction(handle, start,
2470 orig_credits, right_path); 2466 orig_credits, right_path);
@@ -2964,8 +2960,7 @@ static int __ocfs2_rotate_tree_left(handle_t *handle,
2964 subtree_root = ocfs2_find_subtree_root(et, left_path, 2960 subtree_root = ocfs2_find_subtree_root(et, left_path,
2965 right_path); 2961 right_path);
2966 2962
2967 mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", 2963 trace_ocfs2_rotate_subtree(subtree_root,
2968 subtree_root,
2969 (unsigned long long) 2964 (unsigned long long)
2970 right_path->p_node[subtree_root].bh->b_blocknr, 2965 right_path->p_node[subtree_root].bh->b_blocknr,
2971 right_path->p_tree_depth); 2966 right_path->p_tree_depth);
@@ -3989,9 +3984,11 @@ static int ocfs2_append_rec_to_path(handle_t *handle,
3989 goto out; 3984 goto out;
3990 } 3985 }
3991 3986
3992 mlog(0, "Append may need a left path update. cpos: %u, " 3987 trace_ocfs2_append_rec_to_path(
3993 "left_cpos: %u\n", le32_to_cpu(insert_rec->e_cpos), 3988 (unsigned long long)
3994 left_cpos); 3989 ocfs2_metadata_cache_owner(et->et_ci),
3990 le32_to_cpu(insert_rec->e_cpos),
3991 left_cpos);
3995 3992
3996 /* 3993 /*
3997 * No need to worry if the append is already in the 3994 * No need to worry if the append is already in the
@@ -4562,7 +4559,7 @@ static int ocfs2_figure_insert_type(struct ocfs2_extent_tree *et,
4562 ocfs2_et_get_last_eb_blk(et), 4559 ocfs2_et_get_last_eb_blk(et),
4563 &bh); 4560 &bh);
4564 if (ret) { 4561 if (ret) {
4565 mlog_exit(ret); 4562 mlog_errno(ret);
4566 goto out; 4563 goto out;
4567 } 4564 }
4568 eb = (struct ocfs2_extent_block *) bh->b_data; 4565 eb = (struct ocfs2_extent_block *) bh->b_data;
@@ -4678,9 +4675,9 @@ int ocfs2_insert_extent(handle_t *handle,
4678 struct ocfs2_insert_type insert = {0, }; 4675 struct ocfs2_insert_type insert = {0, };
4679 struct ocfs2_extent_rec rec; 4676 struct ocfs2_extent_rec rec;
4680 4677
4681 mlog(0, "add %u clusters at position %u to owner %llu\n", 4678 trace_ocfs2_insert_extent_start(
4682 new_clusters, cpos, 4679 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
4683 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci)); 4680 cpos, new_clusters);
4684 4681
4685 memset(&rec, 0, sizeof(rec)); 4682 memset(&rec, 0, sizeof(rec));
4686 rec.e_cpos = cpu_to_le32(cpos); 4683 rec.e_cpos = cpu_to_le32(cpos);
@@ -4700,11 +4697,9 @@ int ocfs2_insert_extent(handle_t *handle,
4700 goto bail; 4697 goto bail;
4701 } 4698 }
4702 4699
4703 mlog(0, "Insert.appending: %u, Insert.Contig: %u, " 4700 trace_ocfs2_insert_extent(insert.ins_appending, insert.ins_contig,
4704 "Insert.contig_index: %d, Insert.free_records: %d, " 4701 insert.ins_contig_index, free_records,
4705 "Insert.tree_depth: %d\n", 4702 insert.ins_tree_depth);
4706 insert.ins_appending, insert.ins_contig, insert.ins_contig_index,
4707 free_records, insert.ins_tree_depth);
4708 4703
4709 if (insert.ins_contig == CONTIG_NONE && free_records == 0) { 4704 if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
4710 status = ocfs2_grow_tree(handle, et, 4705 status = ocfs2_grow_tree(handle, et,
@@ -4726,7 +4721,6 @@ int ocfs2_insert_extent(handle_t *handle,
4726bail: 4721bail:
4727 brelse(last_eb_bh); 4722 brelse(last_eb_bh);
4728 4723
4729 mlog_exit(status);
4730 return status; 4724 return status;
4731} 4725}
4732 4726
@@ -4746,7 +4740,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4746 struct ocfs2_alloc_context *meta_ac, 4740 struct ocfs2_alloc_context *meta_ac,
4747 enum ocfs2_alloc_restarted *reason_ret) 4741 enum ocfs2_alloc_restarted *reason_ret)
4748{ 4742{
4749 int status = 0; 4743 int status = 0, err = 0;
4750 int free_extents; 4744 int free_extents;
4751 enum ocfs2_alloc_restarted reason = RESTART_NONE; 4745 enum ocfs2_alloc_restarted reason = RESTART_NONE;
4752 u32 bit_off, num_bits; 4746 u32 bit_off, num_bits;
@@ -4773,14 +4767,14 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4773 * 2) we are so fragmented, we've needed to add metadata too 4767 * 2) we are so fragmented, we've needed to add metadata too
4774 * many times. */ 4768 * many times. */
4775 if (!free_extents && !meta_ac) { 4769 if (!free_extents && !meta_ac) {
4776 mlog(0, "we haven't reserved any metadata!\n"); 4770 err = -1;
4777 status = -EAGAIN; 4771 status = -EAGAIN;
4778 reason = RESTART_META; 4772 reason = RESTART_META;
4779 goto leave; 4773 goto leave;
4780 } else if ((!free_extents) 4774 } else if ((!free_extents)
4781 && (ocfs2_alloc_context_bits_left(meta_ac) 4775 && (ocfs2_alloc_context_bits_left(meta_ac)
4782 < ocfs2_extend_meta_needed(et->et_root_el))) { 4776 < ocfs2_extend_meta_needed(et->et_root_el))) {
4783 mlog(0, "filesystem is really fragmented...\n"); 4777 err = -2;
4784 status = -EAGAIN; 4778 status = -EAGAIN;
4785 reason = RESTART_META; 4779 reason = RESTART_META;
4786 goto leave; 4780 goto leave;
@@ -4805,9 +4799,9 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4805 } 4799 }
4806 4800
4807 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 4801 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4808 mlog(0, "Allocating %u clusters at block %u for owner %llu\n", 4802 trace_ocfs2_add_clusters_in_btree(
4809 num_bits, bit_off, 4803 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
4810 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci)); 4804 bit_off, num_bits);
4811 status = ocfs2_insert_extent(handle, et, *logical_offset, block, 4805 status = ocfs2_insert_extent(handle, et, *logical_offset, block,
4812 num_bits, flags, meta_ac); 4806 num_bits, flags, meta_ac);
4813 if (status < 0) { 4807 if (status < 0) {
@@ -4821,16 +4815,15 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4821 *logical_offset += num_bits; 4815 *logical_offset += num_bits;
4822 4816
4823 if (clusters_to_add) { 4817 if (clusters_to_add) {
4824 mlog(0, "need to alloc once more, wanted = %u\n", 4818 err = clusters_to_add;
4825 clusters_to_add);
4826 status = -EAGAIN; 4819 status = -EAGAIN;
4827 reason = RESTART_TRANS; 4820 reason = RESTART_TRANS;
4828 } 4821 }
4829 4822
4830leave: 4823leave:
4831 mlog_exit(status);
4832 if (reason_ret) 4824 if (reason_ret)
4833 *reason_ret = reason; 4825 *reason_ret = reason;
4826 trace_ocfs2_add_clusters_in_btree_ret(status, reason, err);
4834 return status; 4827 return status;
4835} 4828}
4836 4829
@@ -5039,7 +5032,7 @@ int ocfs2_split_extent(handle_t *handle,
5039 ocfs2_et_get_last_eb_blk(et), 5032 ocfs2_et_get_last_eb_blk(et),
5040 &last_eb_bh); 5033 &last_eb_bh);
5041 if (ret) { 5034 if (ret) {
5042 mlog_exit(ret); 5035 mlog_errno(ret);
5043 goto out; 5036 goto out;
5044 } 5037 }
5045 5038
@@ -5056,9 +5049,9 @@ int ocfs2_split_extent(handle_t *handle,
5056 5049
5057 ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]); 5050 ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]);
5058 5051
5059 mlog(0, "index: %d, contig: %u, has_empty: %u, split_covers: %u\n", 5052 trace_ocfs2_split_extent(split_index, ctxt.c_contig_type,
5060 split_index, ctxt.c_contig_type, ctxt.c_has_empty_extent, 5053 ctxt.c_has_empty_extent,
5061 ctxt.c_split_covers_rec); 5054 ctxt.c_split_covers_rec);
5062 5055
5063 if (ctxt.c_contig_type == CONTIG_NONE) { 5056 if (ctxt.c_contig_type == CONTIG_NONE) {
5064 if (ctxt.c_split_covers_rec) 5057 if (ctxt.c_split_covers_rec)
@@ -5192,8 +5185,9 @@ int ocfs2_mark_extent_written(struct inode *inode,
5192{ 5185{
5193 int ret; 5186 int ret;
5194 5187
5195 mlog(0, "Inode %lu cpos %u, len %u, phys clusters %u\n", 5188 trace_ocfs2_mark_extent_written(
5196 inode->i_ino, cpos, len, phys); 5189 (unsigned long long)OCFS2_I(inode)->ip_blkno,
5190 cpos, len, phys);
5197 5191
5198 if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) { 5192 if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
5199 ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents " 5193 ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
@@ -5512,11 +5506,10 @@ int ocfs2_remove_extent(handle_t *handle,
5512 5506
5513 BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range); 5507 BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range);
5514 5508
5515 mlog(0, "Owner %llu, remove (cpos %u, len %u). Existing index %d " 5509 trace_ocfs2_remove_extent(
5516 "(cpos %u, len %u)\n", 5510 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
5517 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), 5511 cpos, len, index, le32_to_cpu(rec->e_cpos),
5518 cpos, len, index, 5512 ocfs2_rec_clusters(el, rec));
5519 le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec));
5520 5513
5521 if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { 5514 if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) {
5522 ret = ocfs2_truncate_rec(handle, et, path, index, dealloc, 5515 ret = ocfs2_truncate_rec(handle, et, path, index, dealloc,
@@ -5795,9 +5788,6 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5795 struct ocfs2_dinode *di; 5788 struct ocfs2_dinode *di;
5796 struct ocfs2_truncate_log *tl; 5789 struct ocfs2_truncate_log *tl;
5797 5790
5798 mlog_entry("start_blk = %llu, num_clusters = %u\n",
5799 (unsigned long long)start_blk, num_clusters);
5800
5801 BUG_ON(mutex_trylock(&tl_inode->i_mutex)); 5791 BUG_ON(mutex_trylock(&tl_inode->i_mutex));
5802 5792
5803 start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk); 5793 start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
@@ -5834,10 +5824,9 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5834 goto bail; 5824 goto bail;
5835 } 5825 }
5836 5826
5837 mlog(0, "Log truncate of %u clusters starting at cluster %u to " 5827 trace_ocfs2_truncate_log_append(
5838 "%llu (index = %d)\n", num_clusters, start_cluster, 5828 (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index,
5839 (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index); 5829 start_cluster, num_clusters);
5840
5841 if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) { 5830 if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) {
5842 /* 5831 /*
5843 * Move index back to the record we are coalescing with. 5832 * Move index back to the record we are coalescing with.
@@ -5846,9 +5835,10 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5846 index--; 5835 index--;
5847 5836
5848 num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters); 5837 num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters);
5849 mlog(0, "Coalesce with index %u (start = %u, clusters = %u)\n", 5838 trace_ocfs2_truncate_log_append(
5850 index, le32_to_cpu(tl->tl_recs[index].t_start), 5839 (unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
5851 num_clusters); 5840 index, le32_to_cpu(tl->tl_recs[index].t_start),
5841 num_clusters);
5852 } else { 5842 } else {
5853 tl->tl_recs[index].t_start = cpu_to_le32(start_cluster); 5843 tl->tl_recs[index].t_start = cpu_to_le32(start_cluster);
5854 tl->tl_used = cpu_to_le16(index + 1); 5844 tl->tl_used = cpu_to_le16(index + 1);
@@ -5859,7 +5849,6 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5859 5849
5860 osb->truncated_clusters += num_clusters; 5850 osb->truncated_clusters += num_clusters;
5861bail: 5851bail:
5862 mlog_exit(status);
5863 return status; 5852 return status;
5864} 5853}
5865 5854
@@ -5878,8 +5867,6 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5878 struct inode *tl_inode = osb->osb_tl_inode; 5867 struct inode *tl_inode = osb->osb_tl_inode;
5879 struct buffer_head *tl_bh = osb->osb_tl_bh; 5868 struct buffer_head *tl_bh = osb->osb_tl_bh;
5880 5869
5881 mlog_entry_void();
5882
5883 di = (struct ocfs2_dinode *) tl_bh->b_data; 5870 di = (struct ocfs2_dinode *) tl_bh->b_data;
5884 tl = &di->id2.i_dealloc; 5871 tl = &di->id2.i_dealloc;
5885 i = le16_to_cpu(tl->tl_used) - 1; 5872 i = le16_to_cpu(tl->tl_used) - 1;
@@ -5915,8 +5902,9 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5915 /* if start_blk is not set, we ignore the record as 5902 /* if start_blk is not set, we ignore the record as
5916 * invalid. */ 5903 * invalid. */
5917 if (start_blk) { 5904 if (start_blk) {
5918 mlog(0, "free record %d, start = %u, clusters = %u\n", 5905 trace_ocfs2_replay_truncate_records(
5919 i, le32_to_cpu(rec.t_start), num_clusters); 5906 (unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
5907 i, le32_to_cpu(rec.t_start), num_clusters);
5920 5908
5921 status = ocfs2_free_clusters(handle, data_alloc_inode, 5909 status = ocfs2_free_clusters(handle, data_alloc_inode,
5922 data_alloc_bh, start_blk, 5910 data_alloc_bh, start_blk,
@@ -5932,7 +5920,6 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5932 osb->truncated_clusters = 0; 5920 osb->truncated_clusters = 0;
5933 5921
5934bail: 5922bail:
5935 mlog_exit(status);
5936 return status; 5923 return status;
5937} 5924}
5938 5925
@@ -5949,8 +5936,6 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
5949 struct ocfs2_dinode *di; 5936 struct ocfs2_dinode *di;
5950 struct ocfs2_truncate_log *tl; 5937 struct ocfs2_truncate_log *tl;
5951 5938
5952 mlog_entry_void();
5953
5954 BUG_ON(mutex_trylock(&tl_inode->i_mutex)); 5939 BUG_ON(mutex_trylock(&tl_inode->i_mutex));
5955 5940
5956 di = (struct ocfs2_dinode *) tl_bh->b_data; 5941 di = (struct ocfs2_dinode *) tl_bh->b_data;
@@ -5962,8 +5947,9 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
5962 5947
5963 tl = &di->id2.i_dealloc; 5948 tl = &di->id2.i_dealloc;
5964 num_to_flush = le16_to_cpu(tl->tl_used); 5949 num_to_flush = le16_to_cpu(tl->tl_used);
5965 mlog(0, "Flush %u records from truncate log #%llu\n", 5950 trace_ocfs2_flush_truncate_log(
5966 num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno); 5951 (unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
5952 num_to_flush);
5967 if (!num_to_flush) { 5953 if (!num_to_flush) {
5968 status = 0; 5954 status = 0;
5969 goto out; 5955 goto out;
@@ -6009,7 +5995,6 @@ out_mutex:
6009 iput(data_alloc_inode); 5995 iput(data_alloc_inode);
6010 5996
6011out: 5997out:
6012 mlog_exit(status);
6013 return status; 5998 return status;
6014} 5999}
6015 6000
@@ -6032,15 +6017,11 @@ static void ocfs2_truncate_log_worker(struct work_struct *work)
6032 container_of(work, struct ocfs2_super, 6017 container_of(work, struct ocfs2_super,
6033 osb_truncate_log_wq.work); 6018 osb_truncate_log_wq.work);
6034 6019
6035 mlog_entry_void();
6036
6037 status = ocfs2_flush_truncate_log(osb); 6020 status = ocfs2_flush_truncate_log(osb);
6038 if (status < 0) 6021 if (status < 0)
6039 mlog_errno(status); 6022 mlog_errno(status);
6040 else 6023 else
6041 ocfs2_init_steal_slots(osb); 6024 ocfs2_init_steal_slots(osb);
6042
6043 mlog_exit(status);
6044} 6025}
6045 6026
6046#define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ) 6027#define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ)
@@ -6086,7 +6067,6 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
6086 *tl_inode = inode; 6067 *tl_inode = inode;
6087 *tl_bh = bh; 6068 *tl_bh = bh;
6088bail: 6069bail:
6089 mlog_exit(status);
6090 return status; 6070 return status;
6091} 6071}
6092 6072
@@ -6106,7 +6086,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
6106 6086
6107 *tl_copy = NULL; 6087 *tl_copy = NULL;
6108 6088
6109 mlog(0, "recover truncate log from slot %d\n", slot_num); 6089 trace_ocfs2_begin_truncate_log_recovery(slot_num);
6110 6090
6111 status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh); 6091 status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh);
6112 if (status < 0) { 6092 if (status < 0) {
@@ -6123,8 +6103,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
6123 6103
6124 tl = &di->id2.i_dealloc; 6104 tl = &di->id2.i_dealloc;
6125 if (le16_to_cpu(tl->tl_used)) { 6105 if (le16_to_cpu(tl->tl_used)) {
6126 mlog(0, "We'll have %u logs to recover\n", 6106 trace_ocfs2_truncate_log_recovery_num(le16_to_cpu(tl->tl_used));
6127 le16_to_cpu(tl->tl_used));
6128 6107
6129 *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL); 6108 *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
6130 if (!(*tl_copy)) { 6109 if (!(*tl_copy)) {
@@ -6157,9 +6136,9 @@ bail:
6157 if (status < 0 && (*tl_copy)) { 6136 if (status < 0 && (*tl_copy)) {
6158 kfree(*tl_copy); 6137 kfree(*tl_copy);
6159 *tl_copy = NULL; 6138 *tl_copy = NULL;
6139 mlog_errno(status);
6160 } 6140 }
6161 6141
6162 mlog_exit(status);
6163 return status; 6142 return status;
6164} 6143}
6165 6144
@@ -6174,8 +6153,6 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
6174 struct inode *tl_inode = osb->osb_tl_inode; 6153 struct inode *tl_inode = osb->osb_tl_inode;
6175 struct ocfs2_truncate_log *tl; 6154 struct ocfs2_truncate_log *tl;
6176 6155
6177 mlog_entry_void();
6178
6179 if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) { 6156 if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) {
6180 mlog(ML_ERROR, "Asked to recover my own truncate log!\n"); 6157 mlog(ML_ERROR, "Asked to recover my own truncate log!\n");
6181 return -EINVAL; 6158 return -EINVAL;
@@ -6183,8 +6160,9 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
6183 6160
6184 tl = &tl_copy->id2.i_dealloc; 6161 tl = &tl_copy->id2.i_dealloc;
6185 num_recs = le16_to_cpu(tl->tl_used); 6162 num_recs = le16_to_cpu(tl->tl_used);
6186 mlog(0, "cleanup %u records from %llu\n", num_recs, 6163 trace_ocfs2_complete_truncate_log_recovery(
6187 (unsigned long long)le64_to_cpu(tl_copy->i_blkno)); 6164 (unsigned long long)le64_to_cpu(tl_copy->i_blkno),
6165 num_recs);
6188 6166
6189 mutex_lock(&tl_inode->i_mutex); 6167 mutex_lock(&tl_inode->i_mutex);
6190 for(i = 0; i < num_recs; i++) { 6168 for(i = 0; i < num_recs; i++) {
@@ -6219,7 +6197,6 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
6219bail_up: 6197bail_up:
6220 mutex_unlock(&tl_inode->i_mutex); 6198 mutex_unlock(&tl_inode->i_mutex);
6221 6199
6222 mlog_exit(status);
6223 return status; 6200 return status;
6224} 6201}
6225 6202
@@ -6228,8 +6205,6 @@ void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb)
6228 int status; 6205 int status;
6229 struct inode *tl_inode = osb->osb_tl_inode; 6206 struct inode *tl_inode = osb->osb_tl_inode;
6230 6207
6231 mlog_entry_void();
6232
6233 if (tl_inode) { 6208 if (tl_inode) {
6234 cancel_delayed_work(&osb->osb_truncate_log_wq); 6209 cancel_delayed_work(&osb->osb_truncate_log_wq);
6235 flush_workqueue(ocfs2_wq); 6210 flush_workqueue(ocfs2_wq);
@@ -6241,8 +6216,6 @@ void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb)
6241 brelse(osb->osb_tl_bh); 6216 brelse(osb->osb_tl_bh);
6242 iput(osb->osb_tl_inode); 6217 iput(osb->osb_tl_inode);
6243 } 6218 }
6244
6245 mlog_exit_void();
6246} 6219}
6247 6220
6248int ocfs2_truncate_log_init(struct ocfs2_super *osb) 6221int ocfs2_truncate_log_init(struct ocfs2_super *osb)
@@ -6251,8 +6224,6 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
6251 struct inode *tl_inode = NULL; 6224 struct inode *tl_inode = NULL;
6252 struct buffer_head *tl_bh = NULL; 6225 struct buffer_head *tl_bh = NULL;
6253 6226
6254 mlog_entry_void();
6255
6256 status = ocfs2_get_truncate_log_info(osb, 6227 status = ocfs2_get_truncate_log_info(osb,
6257 osb->slot_num, 6228 osb->slot_num,
6258 &tl_inode, 6229 &tl_inode,
@@ -6268,7 +6239,6 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
6268 osb->osb_tl_bh = tl_bh; 6239 osb->osb_tl_bh = tl_bh;
6269 osb->osb_tl_inode = tl_inode; 6240 osb->osb_tl_inode = tl_inode;
6270 6241
6271 mlog_exit(status);
6272 return status; 6242 return status;
6273} 6243}
6274 6244
@@ -6350,8 +6320,8 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
6350 else 6320 else
6351 bg_blkno = ocfs2_which_suballoc_group(head->free_blk, 6321 bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
6352 head->free_bit); 6322 head->free_bit);
6353 mlog(0, "Free bit: (bit %u, blkno %llu)\n", 6323 trace_ocfs2_free_cached_blocks(
6354 head->free_bit, (unsigned long long)head->free_blk); 6324 (unsigned long long)head->free_blk, head->free_bit);
6355 6325
6356 ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, 6326 ret = ocfs2_free_suballoc_bits(handle, inode, di_bh,
6357 head->free_bit, bg_blkno, 1); 6327 head->free_bit, bg_blkno, 1);
@@ -6404,8 +6374,7 @@ int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6404 return ret; 6374 return ret;
6405 } 6375 }
6406 6376
6407 mlog(0, "Insert clusters: (bit %u, blk %llu)\n", 6377 trace_ocfs2_cache_cluster_dealloc((unsigned long long)blkno, bit);
6408 bit, (unsigned long long)blkno);
6409 6378
6410 item->free_blk = blkno; 6379 item->free_blk = blkno;
6411 item->free_bit = bit; 6380 item->free_bit = bit;
@@ -6480,8 +6449,8 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb,
6480 fl = ctxt->c_first_suballocator; 6449 fl = ctxt->c_first_suballocator;
6481 6450
6482 if (fl->f_first) { 6451 if (fl->f_first) {
6483 mlog(0, "Free items: (type %u, slot %d)\n", 6452 trace_ocfs2_run_deallocs(fl->f_inode_type,
6484 fl->f_inode_type, fl->f_slot); 6453 fl->f_slot);
6485 ret2 = ocfs2_free_cached_blocks(osb, 6454 ret2 = ocfs2_free_cached_blocks(osb,
6486 fl->f_inode_type, 6455 fl->f_inode_type,
6487 fl->f_slot, 6456 fl->f_slot,
@@ -6558,8 +6527,9 @@ int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6558 goto out; 6527 goto out;
6559 } 6528 }
6560 6529
6561 mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n", 6530 trace_ocfs2_cache_block_dealloc(type, slot,
6562 type, slot, bit, (unsigned long long)blkno); 6531 (unsigned long long)suballoc,
6532 (unsigned long long)blkno, bit);
6563 6533
6564 item->free_bg = suballoc; 6534 item->free_bg = suballoc;
6565 item->free_blk = blkno; 6535 item->free_blk = blkno;
@@ -7005,8 +6975,6 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
7005 struct ocfs2_extent_tree et; 6975 struct ocfs2_extent_tree et;
7006 struct ocfs2_cached_dealloc_ctxt dealloc; 6976 struct ocfs2_cached_dealloc_ctxt dealloc;
7007 6977
7008 mlog_entry_void();
7009
7010 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); 6978 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
7011 ocfs2_init_dealloc_ctxt(&dealloc); 6979 ocfs2_init_dealloc_ctxt(&dealloc);
7012 6980
@@ -7041,8 +7009,11 @@ start:
7041 goto bail; 7009 goto bail;
7042 } 7010 }
7043 7011
7044 mlog(0, "inode->ip_clusters = %u, tree_depth = %u\n", 7012 trace_ocfs2_commit_truncate(
7045 OCFS2_I(inode)->ip_clusters, path->p_tree_depth); 7013 (unsigned long long)OCFS2_I(inode)->ip_blkno,
7014 new_highest_cpos,
7015 OCFS2_I(inode)->ip_clusters,
7016 path->p_tree_depth);
7046 7017
7047 /* 7018 /*
7048 * By now, el will point to the extent list on the bottom most 7019 * By now, el will point to the extent list on the bottom most
@@ -7136,7 +7107,6 @@ bail:
7136 7107
7137 ocfs2_free_path(path); 7108 ocfs2_free_path(path);
7138 7109
7139 mlog_exit(status);
7140 return status; 7110 return status;
7141} 7111}
7142 7112
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1fbb0e20131b..ac97bca282d2 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -29,7 +29,6 @@
29#include <linux/mpage.h> 29#include <linux/mpage.h>
30#include <linux/quotaops.h> 30#include <linux/quotaops.h>
31 31
32#define MLOG_MASK_PREFIX ML_FILE_IO
33#include <cluster/masklog.h> 32#include <cluster/masklog.h>
34 33
35#include "ocfs2.h" 34#include "ocfs2.h"
@@ -45,6 +44,7 @@
45#include "super.h" 44#include "super.h"
46#include "symlink.h" 45#include "symlink.h"
47#include "refcounttree.h" 46#include "refcounttree.h"
47#include "ocfs2_trace.h"
48 48
49#include "buffer_head_io.h" 49#include "buffer_head_io.h"
50 50
@@ -59,8 +59,9 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
59 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 59 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
60 void *kaddr; 60 void *kaddr;
61 61
62 mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, 62 trace_ocfs2_symlink_get_block(
63 (unsigned long long)iblock, bh_result, create); 63 (unsigned long long)OCFS2_I(inode)->ip_blkno,
64 (unsigned long long)iblock, bh_result, create);
64 65
65 BUG_ON(ocfs2_inode_is_fast_symlink(inode)); 66 BUG_ON(ocfs2_inode_is_fast_symlink(inode));
66 67
@@ -123,7 +124,6 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
123bail: 124bail:
124 brelse(bh); 125 brelse(bh);
125 126
126 mlog_exit(err);
127 return err; 127 return err;
128} 128}
129 129
@@ -136,8 +136,8 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
136 u64 p_blkno, count, past_eof; 136 u64 p_blkno, count, past_eof;
137 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 137 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
138 138
139 mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, 139 trace_ocfs2_get_block((unsigned long long)OCFS2_I(inode)->ip_blkno,
140 (unsigned long long)iblock, bh_result, create); 140 (unsigned long long)iblock, bh_result, create);
141 141
142 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) 142 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE)
143 mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n", 143 mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n",
@@ -199,8 +199,9 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
199 } 199 }
200 200
201 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); 201 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
202 mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino, 202
203 (unsigned long long)past_eof); 203 trace_ocfs2_get_block_end((unsigned long long)OCFS2_I(inode)->ip_blkno,
204 (unsigned long long)past_eof);
204 if (create && (iblock >= past_eof)) 205 if (create && (iblock >= past_eof))
205 set_buffer_new(bh_result); 206 set_buffer_new(bh_result);
206 207
@@ -208,7 +209,6 @@ bail:
208 if (err < 0) 209 if (err < 0)
209 err = -EIO; 210 err = -EIO;
210 211
211 mlog_exit(err);
212 return err; 212 return err;
213} 213}
214 214
@@ -278,7 +278,8 @@ static int ocfs2_readpage(struct file *file, struct page *page)
278 loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT; 278 loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT;
279 int ret, unlock = 1; 279 int ret, unlock = 1;
280 280
281 mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); 281 trace_ocfs2_readpage((unsigned long long)oi->ip_blkno,
282 (page ? page->index : 0));
282 283
283 ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page); 284 ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page);
284 if (ret != 0) { 285 if (ret != 0) {
@@ -323,7 +324,6 @@ out_inode_unlock:
323out: 324out:
324 if (unlock) 325 if (unlock)
325 unlock_page(page); 326 unlock_page(page);
326 mlog_exit(ret);
327 return ret; 327 return ret;
328} 328}
329 329
@@ -396,15 +396,11 @@ out_unlock:
396 */ 396 */
397static int ocfs2_writepage(struct page *page, struct writeback_control *wbc) 397static int ocfs2_writepage(struct page *page, struct writeback_control *wbc)
398{ 398{
399 int ret; 399 trace_ocfs2_writepage(
400 400 (unsigned long long)OCFS2_I(page->mapping->host)->ip_blkno,
401 mlog_entry("(0x%p)\n", page); 401 page->index);
402
403 ret = block_write_full_page(page, ocfs2_get_block, wbc);
404 402
405 mlog_exit(ret); 403 return block_write_full_page(page, ocfs2_get_block, wbc);
406
407 return ret;
408} 404}
409 405
410/* Taken from ext3. We don't necessarily need the full blown 406/* Taken from ext3. We don't necessarily need the full blown
@@ -450,7 +446,8 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
450 int err = 0; 446 int err = 0;
451 struct inode *inode = mapping->host; 447 struct inode *inode = mapping->host;
452 448
453 mlog_entry("(block = %llu)\n", (unsigned long long)block); 449 trace_ocfs2_bmap((unsigned long long)OCFS2_I(inode)->ip_blkno,
450 (unsigned long long)block);
454 451
455 /* We don't need to lock journal system files, since they aren't 452 /* We don't need to lock journal system files, since they aren't
456 * accessed concurrently from multiple nodes. 453 * accessed concurrently from multiple nodes.
@@ -484,8 +481,6 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
484bail: 481bail:
485 status = err ? 0 : p_blkno; 482 status = err ? 0 : p_blkno;
486 483
487 mlog_exit((int)status);
488
489 return status; 484 return status;
490} 485}
491 486
@@ -616,9 +611,6 @@ static ssize_t ocfs2_direct_IO(int rw,
616{ 611{
617 struct file *file = iocb->ki_filp; 612 struct file *file = iocb->ki_filp;
618 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; 613 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
619 int ret;
620
621 mlog_entry_void();
622 614
623 /* 615 /*
624 * Fallback to buffered I/O if we see an inode without 616 * Fallback to buffered I/O if we see an inode without
@@ -631,13 +623,10 @@ static ssize_t ocfs2_direct_IO(int rw,
631 if (i_size_read(inode) <= offset) 623 if (i_size_read(inode) <= offset)
632 return 0; 624 return 0;
633 625
634 ret = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, 626 return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
635 iov, offset, nr_segs, 627 iov, offset, nr_segs,
636 ocfs2_direct_IO_get_blocks, 628 ocfs2_direct_IO_get_blocks,
637 ocfs2_dio_end_io, NULL, 0); 629 ocfs2_dio_end_io, NULL, 0);
638
639 mlog_exit(ret);
640 return ret;
641} 630}
642 631
643static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, 632static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb,
@@ -1026,6 +1015,12 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
1026 ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), cpos, 1015 ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), cpos,
1027 &cluster_start, &cluster_end); 1016 &cluster_start, &cluster_end);
1028 1017
1018 /* treat the write as new if the a hole/lseek spanned across
1019 * the page boundary.
1020 */
1021 new = new | ((i_size_read(inode) <= page_offset(page)) &&
1022 (page_offset(page) <= user_pos));
1023
1029 if (page == wc->w_target_page) { 1024 if (page == wc->w_target_page) {
1030 map_from = user_pos & (PAGE_CACHE_SIZE - 1); 1025 map_from = user_pos & (PAGE_CACHE_SIZE - 1);
1031 map_to = map_from + user_len; 1026 map_to = map_from + user_len;
@@ -1534,9 +1529,9 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
1534 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1529 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1535 struct ocfs2_dinode *di = NULL; 1530 struct ocfs2_dinode *di = NULL;
1536 1531
1537 mlog(0, "Inode %llu, write of %u bytes at off %llu. features: 0x%x\n", 1532 trace_ocfs2_try_to_write_inline_data((unsigned long long)oi->ip_blkno,
1538 (unsigned long long)oi->ip_blkno, len, (unsigned long long)pos, 1533 len, (unsigned long long)pos,
1539 oi->ip_dyn_features); 1534 oi->ip_dyn_features);
1540 1535
1541 /* 1536 /*
1542 * Handle inodes which already have inline data 1st. 1537 * Handle inodes which already have inline data 1st.
@@ -1739,6 +1734,13 @@ try_again:
1739 1734
1740 di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; 1735 di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
1741 1736
1737 trace_ocfs2_write_begin_nolock(
1738 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1739 (long long)i_size_read(inode),
1740 le32_to_cpu(di->i_clusters),
1741 pos, len, flags, mmap_page,
1742 clusters_to_alloc, extents_to_split);
1743
1742 /* 1744 /*
1743 * We set w_target_from, w_target_to here so that 1745 * We set w_target_from, w_target_to here so that
1744 * ocfs2_write_end() knows which range in the target page to 1746 * ocfs2_write_end() knows which range in the target page to
@@ -1751,12 +1753,6 @@ try_again:
1751 * ocfs2_lock_allocators(). It greatly over-estimates 1753 * ocfs2_lock_allocators(). It greatly over-estimates
1752 * the work to be done. 1754 * the work to be done.
1753 */ 1755 */
1754 mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u,"
1755 " clusters_to_add = %u, extents_to_split = %u\n",
1756 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1757 (long long)i_size_read(inode), le32_to_cpu(di->i_clusters),
1758 clusters_to_alloc, extents_to_split);
1759
1760 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), 1756 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
1761 wc->w_di_bh); 1757 wc->w_di_bh);
1762 ret = ocfs2_lock_allocators(inode, &et, 1758 ret = ocfs2_lock_allocators(inode, &et,
@@ -1938,8 +1934,8 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
1938 memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied); 1934 memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied);
1939 kunmap_atomic(kaddr, KM_USER0); 1935 kunmap_atomic(kaddr, KM_USER0);
1940 1936
1941 mlog(0, "Data written to inode at offset %llu. " 1937 trace_ocfs2_write_end_inline(
1942 "id_count = %u, copied = %u, i_dyn_features = 0x%x\n", 1938 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1943 (unsigned long long)pos, *copied, 1939 (unsigned long long)pos, *copied,
1944 le16_to_cpu(di->id2.i_data.id_count), 1940 le16_to_cpu(di->id2.i_data.id_count),
1945 le16_to_cpu(di->i_dyn_features)); 1941 le16_to_cpu(di->i_dyn_features));
@@ -2043,7 +2039,6 @@ const struct address_space_operations ocfs2_aops = {
2043 .write_begin = ocfs2_write_begin, 2039 .write_begin = ocfs2_write_begin,
2044 .write_end = ocfs2_write_end, 2040 .write_end = ocfs2_write_end,
2045 .bmap = ocfs2_bmap, 2041 .bmap = ocfs2_bmap,
2046 .sync_page = block_sync_page,
2047 .direct_IO = ocfs2_direct_IO, 2042 .direct_IO = ocfs2_direct_IO,
2048 .invalidatepage = ocfs2_invalidatepage, 2043 .invalidatepage = ocfs2_invalidatepage,
2049 .releasepage = ocfs2_releasepage, 2044 .releasepage = ocfs2_releasepage,
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index f9d5d3ffc75a..5d18ad10c27f 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -35,8 +35,8 @@
35#include "inode.h" 35#include "inode.h"
36#include "journal.h" 36#include "journal.h"
37#include "uptodate.h" 37#include "uptodate.h"
38
39#include "buffer_head_io.h" 38#include "buffer_head_io.h"
39#include "ocfs2_trace.h"
40 40
41/* 41/*
42 * Bits on bh->b_state used by ocfs2. 42 * Bits on bh->b_state used by ocfs2.
@@ -55,8 +55,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
55{ 55{
56 int ret = 0; 56 int ret = 0;
57 57
58 mlog_entry("(bh->b_blocknr = %llu, ci=%p)\n", 58 trace_ocfs2_write_block((unsigned long long)bh->b_blocknr, ci);
59 (unsigned long long)bh->b_blocknr, ci);
60 59
61 BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO); 60 BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO);
62 BUG_ON(buffer_jbd(bh)); 61 BUG_ON(buffer_jbd(bh));
@@ -66,6 +65,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
66 * can get modified during recovery even if read-only. */ 65 * can get modified during recovery even if read-only. */
67 if (ocfs2_is_hard_readonly(osb)) { 66 if (ocfs2_is_hard_readonly(osb)) {
68 ret = -EROFS; 67 ret = -EROFS;
68 mlog_errno(ret);
69 goto out; 69 goto out;
70 } 70 }
71 71
@@ -91,11 +91,11 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
91 * uptodate. */ 91 * uptodate. */
92 ret = -EIO; 92 ret = -EIO;
93 put_bh(bh); 93 put_bh(bh);
94 mlog_errno(ret);
94 } 95 }
95 96
96 ocfs2_metadata_cache_io_unlock(ci); 97 ocfs2_metadata_cache_io_unlock(ci);
97out: 98out:
98 mlog_exit(ret);
99 return ret; 99 return ret;
100} 100}
101 101
@@ -106,10 +106,10 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
106 unsigned int i; 106 unsigned int i;
107 struct buffer_head *bh; 107 struct buffer_head *bh;
108 108
109 if (!nr) { 109 trace_ocfs2_read_blocks_sync((unsigned long long)block, nr);
110 mlog(ML_BH_IO, "No buffers will be read!\n"); 110
111 if (!nr)
111 goto bail; 112 goto bail;
112 }
113 113
114 for (i = 0 ; i < nr ; i++) { 114 for (i = 0 ; i < nr ; i++) {
115 if (bhs[i] == NULL) { 115 if (bhs[i] == NULL) {
@@ -123,10 +123,8 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
123 bh = bhs[i]; 123 bh = bhs[i];
124 124
125 if (buffer_jbd(bh)) { 125 if (buffer_jbd(bh)) {
126 mlog(ML_BH_IO, 126 trace_ocfs2_read_blocks_sync_jbd(
127 "trying to sync read a jbd " 127 (unsigned long long)bh->b_blocknr);
128 "managed bh (blocknr = %llu), skipping\n",
129 (unsigned long long)bh->b_blocknr);
130 continue; 128 continue;
131 } 129 }
132 130
@@ -186,8 +184,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
186 struct buffer_head *bh; 184 struct buffer_head *bh;
187 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 185 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
188 186
189 mlog_entry("(ci=%p, block=(%llu), nr=(%d), flags=%d)\n", 187 trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags);
190 ci, (unsigned long long)block, nr, flags);
191 188
192 BUG_ON(!ci); 189 BUG_ON(!ci);
193 BUG_ON((flags & OCFS2_BH_READAHEAD) && 190 BUG_ON((flags & OCFS2_BH_READAHEAD) &&
@@ -207,7 +204,6 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
207 } 204 }
208 205
209 if (nr == 0) { 206 if (nr == 0) {
210 mlog(ML_BH_IO, "No buffers will be read!\n");
211 status = 0; 207 status = 0;
212 goto bail; 208 goto bail;
213 } 209 }
@@ -251,8 +247,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
251 */ 247 */
252 248
253 if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) { 249 if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) {
254 mlog(ML_UPTODATE, 250 trace_ocfs2_read_blocks_from_disk(
255 "bh (%llu), owner %llu not uptodate\n",
256 (unsigned long long)bh->b_blocknr, 251 (unsigned long long)bh->b_blocknr,
257 (unsigned long long)ocfs2_metadata_cache_owner(ci)); 252 (unsigned long long)ocfs2_metadata_cache_owner(ci));
258 /* We're using ignore_cache here to say 253 /* We're using ignore_cache here to say
@@ -260,11 +255,10 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
260 ignore_cache = 1; 255 ignore_cache = 1;
261 } 256 }
262 257
258 trace_ocfs2_read_blocks_bh((unsigned long long)bh->b_blocknr,
259 ignore_cache, buffer_jbd(bh), buffer_dirty(bh));
260
263 if (buffer_jbd(bh)) { 261 if (buffer_jbd(bh)) {
264 if (ignore_cache)
265 mlog(ML_BH_IO, "trying to sync read a jbd "
266 "managed bh (blocknr = %llu)\n",
267 (unsigned long long)bh->b_blocknr);
268 continue; 262 continue;
269 } 263 }
270 264
@@ -272,9 +266,6 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
272 if (buffer_dirty(bh)) { 266 if (buffer_dirty(bh)) {
273 /* This should probably be a BUG, or 267 /* This should probably be a BUG, or
274 * at least return an error. */ 268 * at least return an error. */
275 mlog(ML_BH_IO, "asking me to sync read a dirty "
276 "buffer! (blocknr = %llu)\n",
277 (unsigned long long)bh->b_blocknr);
278 continue; 269 continue;
279 } 270 }
280 271
@@ -367,14 +358,11 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
367 } 358 }
368 ocfs2_metadata_cache_io_unlock(ci); 359 ocfs2_metadata_cache_io_unlock(ci);
369 360
370 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 361 trace_ocfs2_read_blocks_end((unsigned long long)block, nr,
371 (unsigned long long)block, nr, 362 flags, ignore_cache);
372 ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes",
373 flags);
374 363
375bail: 364bail:
376 365
377 mlog_exit(status);
378 return status; 366 return status;
379} 367}
380 368
@@ -408,13 +396,12 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
408 int ret = 0; 396 int ret = 0;
409 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 397 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
410 398
411 mlog_entry_void();
412
413 BUG_ON(buffer_jbd(bh)); 399 BUG_ON(buffer_jbd(bh));
414 ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr); 400 ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr);
415 401
416 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) { 402 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) {
417 ret = -EROFS; 403 ret = -EROFS;
404 mlog_errno(ret);
418 goto out; 405 goto out;
419 } 406 }
420 407
@@ -434,9 +421,9 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
434 if (!buffer_uptodate(bh)) { 421 if (!buffer_uptodate(bh)) {
435 ret = -EIO; 422 ret = -EIO;
436 put_bh(bh); 423 put_bh(bh);
424 mlog_errno(ret);
437 } 425 }
438 426
439out: 427out:
440 mlog_exit(ret);
441 return ret; 428 return ret;
442} 429}
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index b108e863d8f6..2461eb3272ed 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -367,11 +367,7 @@ static inline void o2hb_bio_wait_dec(struct o2hb_bio_wait_ctxt *wc,
367static void o2hb_wait_on_io(struct o2hb_region *reg, 367static void o2hb_wait_on_io(struct o2hb_region *reg,
368 struct o2hb_bio_wait_ctxt *wc) 368 struct o2hb_bio_wait_ctxt *wc)
369{ 369{
370 struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping;
371
372 blk_run_address_space(mapping);
373 o2hb_bio_wait_dec(wc, 1); 370 o2hb_bio_wait_dec(wc, 1);
374
375 wait_for_completion(&wc->wc_io_complete); 371 wait_for_completion(&wc->wc_io_complete);
376} 372}
377 373
@@ -1658,8 +1654,6 @@ static int o2hb_populate_slot_data(struct o2hb_region *reg)
1658 struct o2hb_disk_slot *slot; 1654 struct o2hb_disk_slot *slot;
1659 struct o2hb_disk_heartbeat_block *hb_block; 1655 struct o2hb_disk_heartbeat_block *hb_block;
1660 1656
1661 mlog_entry_void();
1662
1663 ret = o2hb_read_slots(reg, reg->hr_blocks); 1657 ret = o2hb_read_slots(reg, reg->hr_blocks);
1664 if (ret) { 1658 if (ret) {
1665 mlog_errno(ret); 1659 mlog_errno(ret);
@@ -1681,7 +1675,6 @@ static int o2hb_populate_slot_data(struct o2hb_region *reg)
1681 } 1675 }
1682 1676
1683out: 1677out:
1684 mlog_exit(ret);
1685 return ret; 1678 return ret;
1686} 1679}
1687 1680
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 6c61771469af..07ac24fd9252 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -30,7 +30,7 @@
30 30
31struct mlog_bits mlog_and_bits = MLOG_BITS_RHS(MLOG_INITIAL_AND_MASK); 31struct mlog_bits mlog_and_bits = MLOG_BITS_RHS(MLOG_INITIAL_AND_MASK);
32EXPORT_SYMBOL_GPL(mlog_and_bits); 32EXPORT_SYMBOL_GPL(mlog_and_bits);
33struct mlog_bits mlog_not_bits = MLOG_BITS_RHS(MLOG_INITIAL_NOT_MASK); 33struct mlog_bits mlog_not_bits = MLOG_BITS_RHS(0);
34EXPORT_SYMBOL_GPL(mlog_not_bits); 34EXPORT_SYMBOL_GPL(mlog_not_bits);
35 35
36static ssize_t mlog_mask_show(u64 mask, char *buf) 36static ssize_t mlog_mask_show(u64 mask, char *buf)
@@ -80,8 +80,6 @@ struct mlog_attribute {
80} 80}
81 81
82static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = { 82static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
83 define_mask(ENTRY),
84 define_mask(EXIT),
85 define_mask(TCP), 83 define_mask(TCP),
86 define_mask(MSG), 84 define_mask(MSG),
87 define_mask(SOCKET), 85 define_mask(SOCKET),
@@ -93,27 +91,11 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
93 define_mask(DLM_THREAD), 91 define_mask(DLM_THREAD),
94 define_mask(DLM_MASTER), 92 define_mask(DLM_MASTER),
95 define_mask(DLM_RECOVERY), 93 define_mask(DLM_RECOVERY),
96 define_mask(AIO),
97 define_mask(JOURNAL),
98 define_mask(DISK_ALLOC),
99 define_mask(SUPER),
100 define_mask(FILE_IO),
101 define_mask(EXTENT_MAP),
102 define_mask(DLM_GLUE), 94 define_mask(DLM_GLUE),
103 define_mask(BH_IO),
104 define_mask(UPTODATE),
105 define_mask(NAMEI),
106 define_mask(INODE),
107 define_mask(VOTE), 95 define_mask(VOTE),
108 define_mask(DCACHE),
109 define_mask(CONN), 96 define_mask(CONN),
110 define_mask(QUORUM), 97 define_mask(QUORUM),
111 define_mask(EXPORT),
112 define_mask(XATTR),
113 define_mask(QUOTA),
114 define_mask(REFCOUNT),
115 define_mask(BASTS), 98 define_mask(BASTS),
116 define_mask(RESERVATIONS),
117 define_mask(CLUSTER), 99 define_mask(CLUSTER),
118 define_mask(ERROR), 100 define_mask(ERROR),
119 define_mask(NOTICE), 101 define_mask(NOTICE),
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 34d6544357d9..baa2b9ef7eef 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -82,41 +82,23 @@
82 82
83/* bits that are frequently given and infrequently matched in the low word */ 83/* bits that are frequently given and infrequently matched in the low word */
84/* NOTE: If you add a flag, you need to also update masklog.c! */ 84/* NOTE: If you add a flag, you need to also update masklog.c! */
85#define ML_ENTRY 0x0000000000000001ULL /* func call entry */ 85#define ML_TCP 0x0000000000000001ULL /* net cluster/tcp.c */
86#define ML_EXIT 0x0000000000000002ULL /* func call exit */ 86#define ML_MSG 0x0000000000000002ULL /* net network messages */
87#define ML_TCP 0x0000000000000004ULL /* net cluster/tcp.c */ 87#define ML_SOCKET 0x0000000000000004ULL /* net socket lifetime */
88#define ML_MSG 0x0000000000000008ULL /* net network messages */ 88#define ML_HEARTBEAT 0x0000000000000008ULL /* hb all heartbeat tracking */
89#define ML_SOCKET 0x0000000000000010ULL /* net socket lifetime */ 89#define ML_HB_BIO 0x0000000000000010ULL /* hb io tracing */
90#define ML_HEARTBEAT 0x0000000000000020ULL /* hb all heartbeat tracking */ 90#define ML_DLMFS 0x0000000000000020ULL /* dlm user dlmfs */
91#define ML_HB_BIO 0x0000000000000040ULL /* hb io tracing */ 91#define ML_DLM 0x0000000000000040ULL /* dlm general debugging */
92#define ML_DLMFS 0x0000000000000080ULL /* dlm user dlmfs */ 92#define ML_DLM_DOMAIN 0x0000000000000080ULL /* dlm domain debugging */
93#define ML_DLM 0x0000000000000100ULL /* dlm general debugging */ 93#define ML_DLM_THREAD 0x0000000000000100ULL /* dlm domain thread */
94#define ML_DLM_DOMAIN 0x0000000000000200ULL /* dlm domain debugging */ 94#define ML_DLM_MASTER 0x0000000000000200ULL /* dlm master functions */
95#define ML_DLM_THREAD 0x0000000000000400ULL /* dlm domain thread */ 95#define ML_DLM_RECOVERY 0x0000000000000400ULL /* dlm master functions */
96#define ML_DLM_MASTER 0x0000000000000800ULL /* dlm master functions */ 96#define ML_DLM_GLUE 0x0000000000000800ULL /* ocfs2 dlm glue layer */
97#define ML_DLM_RECOVERY 0x0000000000001000ULL /* dlm master functions */ 97#define ML_VOTE 0x0000000000001000ULL /* ocfs2 node messaging */
98#define ML_AIO 0x0000000000002000ULL /* ocfs2 aio read and write */ 98#define ML_CONN 0x0000000000002000ULL /* net connection management */
99#define ML_JOURNAL 0x0000000000004000ULL /* ocfs2 journalling functions */ 99#define ML_QUORUM 0x0000000000004000ULL /* net connection quorum */
100#define ML_DISK_ALLOC 0x0000000000008000ULL /* ocfs2 disk allocation */ 100#define ML_BASTS 0x0000000000008000ULL /* dlmglue asts and basts */
101#define ML_SUPER 0x0000000000010000ULL /* ocfs2 mount / umount */ 101#define ML_CLUSTER 0x0000000000010000ULL /* cluster stack */
102#define ML_FILE_IO 0x0000000000020000ULL /* ocfs2 file I/O */
103#define ML_EXTENT_MAP 0x0000000000040000ULL /* ocfs2 extent map caching */
104#define ML_DLM_GLUE 0x0000000000080000ULL /* ocfs2 dlm glue layer */
105#define ML_BH_IO 0x0000000000100000ULL /* ocfs2 buffer I/O */
106#define ML_UPTODATE 0x0000000000200000ULL /* ocfs2 caching sequence #'s */
107#define ML_NAMEI 0x0000000000400000ULL /* ocfs2 directory / namespace */
108#define ML_INODE 0x0000000000800000ULL /* ocfs2 inode manipulation */
109#define ML_VOTE 0x0000000001000000ULL /* ocfs2 node messaging */
110#define ML_DCACHE 0x0000000002000000ULL /* ocfs2 dcache operations */
111#define ML_CONN 0x0000000004000000ULL /* net connection management */
112#define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */
113#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */
114#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
115#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */
116#define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */
117#define ML_BASTS 0x0000000100000000ULL /* dlmglue asts and basts */
118#define ML_RESERVATIONS 0x0000000200000000ULL /* ocfs2 alloc reservations */
119#define ML_CLUSTER 0x0000000400000000ULL /* cluster stack */
120 102
121/* bits that are infrequently given and frequently matched in the high word */ 103/* bits that are infrequently given and frequently matched in the high word */
122#define ML_ERROR 0x1000000000000000ULL /* sent to KERN_ERR */ 104#define ML_ERROR 0x1000000000000000ULL /* sent to KERN_ERR */
@@ -124,7 +106,6 @@
124#define ML_KTHREAD 0x4000000000000000ULL /* kernel thread activity */ 106#define ML_KTHREAD 0x4000000000000000ULL /* kernel thread activity */
125 107
126#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) 108#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
127#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
128#ifndef MLOG_MASK_PREFIX 109#ifndef MLOG_MASK_PREFIX
129#define MLOG_MASK_PREFIX 0 110#define MLOG_MASK_PREFIX 0
130#endif 111#endif
@@ -222,58 +203,6 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
222 mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ 203 mlog(ML_ERROR, "status = %lld\n", (long long)_st); \
223} while (0) 204} while (0)
224 205
225#if defined(CONFIG_OCFS2_DEBUG_MASKLOG)
226#define mlog_entry(fmt, args...) do { \
227 mlog(ML_ENTRY, "ENTRY:" fmt , ##args); \
228} while (0)
229
230#define mlog_entry_void() do { \
231 mlog(ML_ENTRY, "ENTRY:\n"); \
232} while (0)
233
234/*
235 * We disable this for sparse.
236 */
237#if !defined(__CHECKER__)
238#define mlog_exit(st) do { \
239 if (__builtin_types_compatible_p(typeof(st), unsigned long)) \
240 mlog(ML_EXIT, "EXIT: %lu\n", (unsigned long) (st)); \
241 else if (__builtin_types_compatible_p(typeof(st), signed long)) \
242 mlog(ML_EXIT, "EXIT: %ld\n", (signed long) (st)); \
243 else if (__builtin_types_compatible_p(typeof(st), unsigned int) \
244 || __builtin_types_compatible_p(typeof(st), unsigned short) \
245 || __builtin_types_compatible_p(typeof(st), unsigned char)) \
246 mlog(ML_EXIT, "EXIT: %u\n", (unsigned int) (st)); \
247 else if (__builtin_types_compatible_p(typeof(st), signed int) \
248 || __builtin_types_compatible_p(typeof(st), signed short) \
249 || __builtin_types_compatible_p(typeof(st), signed char)) \
250 mlog(ML_EXIT, "EXIT: %d\n", (signed int) (st)); \
251 else if (__builtin_types_compatible_p(typeof(st), long long)) \
252 mlog(ML_EXIT, "EXIT: %lld\n", (long long) (st)); \
253 else \
254 mlog(ML_EXIT, "EXIT: %llu\n", (unsigned long long) (st)); \
255} while (0)
256#else
257#define mlog_exit(st) do { \
258 mlog(ML_EXIT, "EXIT: %lld\n", (long long) (st)); \
259} while (0)
260#endif
261
262#define mlog_exit_ptr(ptr) do { \
263 mlog(ML_EXIT, "EXIT: %p\n", ptr); \
264} while (0)
265
266#define mlog_exit_void() do { \
267 mlog(ML_EXIT, "EXIT\n"); \
268} while (0)
269#else
270#define mlog_entry(...) do { } while (0)
271#define mlog_entry_void(...) do { } while (0)
272#define mlog_exit(...) do { } while (0)
273#define mlog_exit_ptr(...) do { } while (0)
274#define mlog_exit_void(...) do { } while (0)
275#endif /* defined(CONFIG_OCFS2_DEBUG_MASKLOG) */
276
277#define mlog_bug_on_msg(cond, fmt, args...) do { \ 206#define mlog_bug_on_msg(cond, fmt, args...) do { \
278 if (cond) { \ 207 if (cond) { \
279 mlog(ML_ERROR, "bug expression: " #cond "\n"); \ 208 mlog(ML_ERROR, "bug expression: " #cond "\n"); \
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 3b11cb1e38fc..ee04ff5ee603 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -210,10 +210,6 @@ static inline void o2net_set_func_stop_time(struct o2net_sock_container *sc)
210 sc->sc_tv_func_stop = ktime_get(); 210 sc->sc_tv_func_stop = ktime_get();
211} 211}
212 212
213static ktime_t o2net_get_func_run_time(struct o2net_sock_container *sc)
214{
215 return ktime_sub(sc->sc_tv_func_stop, sc->sc_tv_func_start);
216}
217#else /* CONFIG_DEBUG_FS */ 213#else /* CONFIG_DEBUG_FS */
218# define o2net_init_nst(a, b, c, d, e) 214# define o2net_init_nst(a, b, c, d, e)
219# define o2net_set_nst_sock_time(a) 215# define o2net_set_nst_sock_time(a)
@@ -227,10 +223,14 @@ static ktime_t o2net_get_func_run_time(struct o2net_sock_container *sc)
227# define o2net_set_advance_stop_time(a) 223# define o2net_set_advance_stop_time(a)
228# define o2net_set_func_start_time(a) 224# define o2net_set_func_start_time(a)
229# define o2net_set_func_stop_time(a) 225# define o2net_set_func_stop_time(a)
230# define o2net_get_func_run_time(a) (ktime_t)0
231#endif /* CONFIG_DEBUG_FS */ 226#endif /* CONFIG_DEBUG_FS */
232 227
233#ifdef CONFIG_OCFS2_FS_STATS 228#ifdef CONFIG_OCFS2_FS_STATS
229static ktime_t o2net_get_func_run_time(struct o2net_sock_container *sc)
230{
231 return ktime_sub(sc->sc_tv_func_stop, sc->sc_tv_func_start);
232}
233
234static void o2net_update_send_stats(struct o2net_send_tracking *nst, 234static void o2net_update_send_stats(struct o2net_send_tracking *nst,
235 struct o2net_sock_container *sc) 235 struct o2net_sock_container *sc)
236{ 236{
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 7eb90403fc8a..e5ba34818332 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -28,7 +28,6 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/namei.h> 29#include <linux/namei.h>
30 30
31#define MLOG_MASK_PREFIX ML_DCACHE
32#include <cluster/masklog.h> 31#include <cluster/masklog.h>
33 32
34#include "ocfs2.h" 33#include "ocfs2.h"
@@ -39,6 +38,7 @@
39#include "file.h" 38#include "file.h"
40#include "inode.h" 39#include "inode.h"
41#include "super.h" 40#include "super.h"
41#include "ocfs2_trace.h"
42 42
43void ocfs2_dentry_attach_gen(struct dentry *dentry) 43void ocfs2_dentry_attach_gen(struct dentry *dentry)
44{ 44{
@@ -62,8 +62,8 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
62 inode = dentry->d_inode; 62 inode = dentry->d_inode;
63 osb = OCFS2_SB(dentry->d_sb); 63 osb = OCFS2_SB(dentry->d_sb);
64 64
65 mlog_entry("(0x%p, '%.*s')\n", dentry, 65 trace_ocfs2_dentry_revalidate(dentry, dentry->d_name.len,
66 dentry->d_name.len, dentry->d_name.name); 66 dentry->d_name.name);
67 67
68 /* For a negative dentry - 68 /* For a negative dentry -
69 * check the generation number of the parent and compare with the 69 * check the generation number of the parent and compare with the
@@ -73,9 +73,10 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
73 unsigned long gen = (unsigned long) dentry->d_fsdata; 73 unsigned long gen = (unsigned long) dentry->d_fsdata;
74 unsigned long pgen = 74 unsigned long pgen =
75 OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen; 75 OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
76 mlog(0, "negative dentry: %.*s parent gen: %lu " 76
77 "dentry gen: %lu\n", 77 trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len,
78 dentry->d_name.len, dentry->d_name.name, pgen, gen); 78 dentry->d_name.name,
79 pgen, gen);
79 if (gen != pgen) 80 if (gen != pgen)
80 goto bail; 81 goto bail;
81 goto valid; 82 goto valid;
@@ -90,8 +91,8 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
90 /* did we or someone else delete this inode? */ 91 /* did we or someone else delete this inode? */
91 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { 92 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
92 spin_unlock(&OCFS2_I(inode)->ip_lock); 93 spin_unlock(&OCFS2_I(inode)->ip_lock);
93 mlog(0, "inode (%llu) deleted, returning false\n", 94 trace_ocfs2_dentry_revalidate_delete(
94 (unsigned long long)OCFS2_I(inode)->ip_blkno); 95 (unsigned long long)OCFS2_I(inode)->ip_blkno);
95 goto bail; 96 goto bail;
96 } 97 }
97 spin_unlock(&OCFS2_I(inode)->ip_lock); 98 spin_unlock(&OCFS2_I(inode)->ip_lock);
@@ -101,10 +102,9 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
101 * inode nlink hits zero, it never goes back. 102 * inode nlink hits zero, it never goes back.
102 */ 103 */
103 if (inode->i_nlink == 0) { 104 if (inode->i_nlink == 0) {
104 mlog(0, "Inode %llu orphaned, returning false " 105 trace_ocfs2_dentry_revalidate_orphaned(
105 "dir = %d\n", 106 (unsigned long long)OCFS2_I(inode)->ip_blkno,
106 (unsigned long long)OCFS2_I(inode)->ip_blkno, 107 S_ISDIR(inode->i_mode));
107 S_ISDIR(inode->i_mode));
108 goto bail; 108 goto bail;
109 } 109 }
110 110
@@ -113,9 +113,8 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
113 * redo it. 113 * redo it.
114 */ 114 */
115 if (!dentry->d_fsdata) { 115 if (!dentry->d_fsdata) {
116 mlog(0, "Inode %llu doesn't have dentry lock, " 116 trace_ocfs2_dentry_revalidate_nofsdata(
117 "returning false\n", 117 (unsigned long long)OCFS2_I(inode)->ip_blkno);
118 (unsigned long long)OCFS2_I(inode)->ip_blkno);
119 goto bail; 118 goto bail;
120 } 119 }
121 120
@@ -123,8 +122,7 @@ valid:
123 ret = 1; 122 ret = 1;
124 123
125bail: 124bail:
126 mlog_exit(ret); 125 trace_ocfs2_dentry_revalidate_ret(ret);
127
128 return ret; 126 return ret;
129} 127}
130 128
@@ -181,8 +179,8 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
181 179
182 spin_lock(&dentry->d_lock); 180 spin_lock(&dentry->d_lock);
183 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { 181 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
184 mlog(0, "dentry found: %.*s\n", 182 trace_ocfs2_find_local_alias(dentry->d_name.len,
185 dentry->d_name.len, dentry->d_name.name); 183 dentry->d_name.name);
186 184
187 dget_dlock(dentry); 185 dget_dlock(dentry);
188 spin_unlock(&dentry->d_lock); 186 spin_unlock(&dentry->d_lock);
@@ -240,9 +238,8 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
240 struct dentry *alias; 238 struct dentry *alias;
241 struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 239 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
242 240
243 mlog(0, "Attach \"%.*s\", parent %llu, fsdata: %p\n", 241 trace_ocfs2_dentry_attach_lock(dentry->d_name.len, dentry->d_name.name,
244 dentry->d_name.len, dentry->d_name.name, 242 (unsigned long long)parent_blkno, dl);
245 (unsigned long long)parent_blkno, dl);
246 243
247 /* 244 /*
248 * Negative dentry. We ignore these for now. 245 * Negative dentry. We ignore these for now.
@@ -292,7 +289,9 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
292 (unsigned long long)parent_blkno, 289 (unsigned long long)parent_blkno,
293 (unsigned long long)dl->dl_parent_blkno); 290 (unsigned long long)dl->dl_parent_blkno);
294 291
295 mlog(0, "Found: %s\n", dl->dl_lockres.l_name); 292 trace_ocfs2_dentry_attach_lock_found(dl->dl_lockres.l_name,
293 (unsigned long long)parent_blkno,
294 (unsigned long long)OCFS2_I(inode)->ip_blkno);
296 295
297 goto out_attach; 296 goto out_attach;
298 } 297 }
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index f97b6f1c61dd..9fe5b8fd658f 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -43,7 +43,6 @@
43#include <linux/quotaops.h> 43#include <linux/quotaops.h>
44#include <linux/sort.h> 44#include <linux/sort.h>
45 45
46#define MLOG_MASK_PREFIX ML_NAMEI
47#include <cluster/masklog.h> 46#include <cluster/masklog.h>
48 47
49#include "ocfs2.h" 48#include "ocfs2.h"
@@ -61,6 +60,7 @@
61#include "super.h" 60#include "super.h"
62#include "sysfile.h" 61#include "sysfile.h"
63#include "uptodate.h" 62#include "uptodate.h"
63#include "ocfs2_trace.h"
64 64
65#include "buffer_head_io.h" 65#include "buffer_head_io.h"
66 66
@@ -322,21 +322,23 @@ static int ocfs2_check_dir_entry(struct inode * dir,
322 const char *error_msg = NULL; 322 const char *error_msg = NULL;
323 const int rlen = le16_to_cpu(de->rec_len); 323 const int rlen = le16_to_cpu(de->rec_len);
324 324
325 if (rlen < OCFS2_DIR_REC_LEN(1)) 325 if (unlikely(rlen < OCFS2_DIR_REC_LEN(1)))
326 error_msg = "rec_len is smaller than minimal"; 326 error_msg = "rec_len is smaller than minimal";
327 else if (rlen % 4 != 0) 327 else if (unlikely(rlen % 4 != 0))
328 error_msg = "rec_len % 4 != 0"; 328 error_msg = "rec_len % 4 != 0";
329 else if (rlen < OCFS2_DIR_REC_LEN(de->name_len)) 329 else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len)))
330 error_msg = "rec_len is too small for name_len"; 330 error_msg = "rec_len is too small for name_len";
331 else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) 331 else if (unlikely(
332 ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))
332 error_msg = "directory entry across blocks"; 333 error_msg = "directory entry across blocks";
333 334
334 if (error_msg != NULL) 335 if (unlikely(error_msg != NULL))
335 mlog(ML_ERROR, "bad entry in directory #%llu: %s - " 336 mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
336 "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n", 337 "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
337 (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg, 338 (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
338 offset, (unsigned long long)le64_to_cpu(de->inode), rlen, 339 offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
339 de->name_len); 340 de->name_len);
341
340 return error_msg == NULL ? 1 : 0; 342 return error_msg == NULL ? 1 : 0;
341} 343}
342 344
@@ -367,8 +369,6 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh,
367 int de_len; 369 int de_len;
368 int ret = 0; 370 int ret = 0;
369 371
370 mlog_entry_void();
371
372 de_buf = first_de; 372 de_buf = first_de;
373 dlimit = de_buf + bytes; 373 dlimit = de_buf + bytes;
374 374
@@ -402,7 +402,7 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh,
402 } 402 }
403 403
404bail: 404bail:
405 mlog_exit(ret); 405 trace_ocfs2_search_dirblock(ret);
406 return ret; 406 return ret;
407} 407}
408 408
@@ -447,8 +447,7 @@ static int ocfs2_validate_dir_block(struct super_block *sb,
447 * We don't validate dirents here, that's handled 447 * We don't validate dirents here, that's handled
448 * in-place when the code walks them. 448 * in-place when the code walks them.
449 */ 449 */
450 mlog(0, "Validating dirblock %llu\n", 450 trace_ocfs2_validate_dir_block((unsigned long long)bh->b_blocknr);
451 (unsigned long long)bh->b_blocknr);
452 451
453 BUG_ON(!buffer_uptodate(bh)); 452 BUG_ON(!buffer_uptodate(bh));
454 453
@@ -706,8 +705,6 @@ static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
706 int num = 0; 705 int num = 0;
707 int nblocks, i, err; 706 int nblocks, i, err;
708 707
709 mlog_entry_void();
710
711 sb = dir->i_sb; 708 sb = dir->i_sb;
712 709
713 nblocks = i_size_read(dir) >> sb->s_blocksize_bits; 710 nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
@@ -788,7 +785,7 @@ cleanup_and_exit:
788 for (; ra_ptr < ra_max; ra_ptr++) 785 for (; ra_ptr < ra_max; ra_ptr++)
789 brelse(bh_use[ra_ptr]); 786 brelse(bh_use[ra_ptr]);
790 787
791 mlog_exit_ptr(ret); 788 trace_ocfs2_find_entry_el(ret);
792 return ret; 789 return ret;
793} 790}
794 791
@@ -950,11 +947,9 @@ static int ocfs2_dx_dir_search(const char *name, int namelen,
950 goto out; 947 goto out;
951 } 948 }
952 949
953 mlog(0, "Dir %llu: name: \"%.*s\", lookup of hash: %u.0x%x " 950 trace_ocfs2_dx_dir_search((unsigned long long)OCFS2_I(dir)->ip_blkno,
954 "returns: %llu\n", 951 namelen, name, hinfo->major_hash,
955 (unsigned long long)OCFS2_I(dir)->ip_blkno, 952 hinfo->minor_hash, (unsigned long long)phys);
956 namelen, name, hinfo->major_hash, hinfo->minor_hash,
957 (unsigned long long)phys);
958 953
959 ret = ocfs2_read_dx_leaf(dir, phys, &dx_leaf_bh); 954 ret = ocfs2_read_dx_leaf(dir, phys, &dx_leaf_bh);
960 if (ret) { 955 if (ret) {
@@ -964,9 +959,9 @@ static int ocfs2_dx_dir_search(const char *name, int namelen,
964 959
965 dx_leaf = (struct ocfs2_dx_leaf *) dx_leaf_bh->b_data; 960 dx_leaf = (struct ocfs2_dx_leaf *) dx_leaf_bh->b_data;
966 961
967 mlog(0, "leaf info: num_used: %d, count: %d\n", 962 trace_ocfs2_dx_dir_search_leaf_info(
968 le16_to_cpu(dx_leaf->dl_list.de_num_used), 963 le16_to_cpu(dx_leaf->dl_list.de_num_used),
969 le16_to_cpu(dx_leaf->dl_list.de_count)); 964 le16_to_cpu(dx_leaf->dl_list.de_count));
970 965
971 entry_list = &dx_leaf->dl_list; 966 entry_list = &dx_leaf->dl_list;
972 967
@@ -1166,8 +1161,6 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
1166 int i, status = -ENOENT; 1161 int i, status = -ENOENT;
1167 ocfs2_journal_access_func access = ocfs2_journal_access_db; 1162 ocfs2_journal_access_func access = ocfs2_journal_access_db;
1168 1163
1169 mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
1170
1171 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 1164 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
1172 access = ocfs2_journal_access_di; 1165 access = ocfs2_journal_access_di;
1173 1166
@@ -1202,7 +1195,6 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
1202 de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len)); 1195 de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len));
1203 } 1196 }
1204bail: 1197bail:
1205 mlog_exit(status);
1206 return status; 1198 return status;
1207} 1199}
1208 1200
@@ -1348,8 +1340,8 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
1348 } 1340 }
1349 } 1341 }
1350 1342
1351 mlog(0, "Dir %llu: delete entry at index: %d\n", 1343 trace_ocfs2_delete_entry_dx((unsigned long long)OCFS2_I(dir)->ip_blkno,
1352 (unsigned long long)OCFS2_I(dir)->ip_blkno, index); 1344 index);
1353 1345
1354 ret = __ocfs2_delete_entry(handle, dir, lookup->dl_entry, 1346 ret = __ocfs2_delete_entry(handle, dir, lookup->dl_entry,
1355 leaf_bh, leaf_bh->b_data, leaf_bh->b_size); 1347 leaf_bh, leaf_bh->b_data, leaf_bh->b_size);
@@ -1632,8 +1624,6 @@ int __ocfs2_add_entry(handle_t *handle,
1632 struct buffer_head *insert_bh = lookup->dl_leaf_bh; 1624 struct buffer_head *insert_bh = lookup->dl_leaf_bh;
1633 char *data_start = insert_bh->b_data; 1625 char *data_start = insert_bh->b_data;
1634 1626
1635 mlog_entry_void();
1636
1637 if (!namelen) 1627 if (!namelen)
1638 return -EINVAL; 1628 return -EINVAL;
1639 1629
@@ -1765,8 +1755,9 @@ int __ocfs2_add_entry(handle_t *handle,
1765 * from ever getting here. */ 1755 * from ever getting here. */
1766 retval = -ENOSPC; 1756 retval = -ENOSPC;
1767bail: 1757bail:
1758 if (retval)
1759 mlog_errno(retval);
1768 1760
1769 mlog_exit(retval);
1770 return retval; 1761 return retval;
1771} 1762}
1772 1763
@@ -2028,8 +2019,7 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
2028 struct inode *inode = filp->f_path.dentry->d_inode; 2019 struct inode *inode = filp->f_path.dentry->d_inode;
2029 int lock_level = 0; 2020 int lock_level = 0;
2030 2021
2031 mlog_entry("dirino=%llu\n", 2022 trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
2032 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2033 2023
2034 error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); 2024 error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
2035 if (lock_level && error >= 0) { 2025 if (lock_level && error >= 0) {
@@ -2051,9 +2041,10 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
2051 dirent, filldir, NULL); 2041 dirent, filldir, NULL);
2052 2042
2053 ocfs2_inode_unlock(inode, lock_level); 2043 ocfs2_inode_unlock(inode, lock_level);
2044 if (error)
2045 mlog_errno(error);
2054 2046
2055bail_nolock: 2047bail_nolock:
2056 mlog_exit(error);
2057 2048
2058 return error; 2049 return error;
2059} 2050}
@@ -2069,8 +2060,8 @@ int ocfs2_find_files_on_disk(const char *name,
2069{ 2060{
2070 int status = -ENOENT; 2061 int status = -ENOENT;
2071 2062
2072 mlog(0, "name=%.*s, blkno=%p, inode=%llu\n", namelen, name, blkno, 2063 trace_ocfs2_find_files_on_disk(namelen, name, blkno,
2073 (unsigned long long)OCFS2_I(inode)->ip_blkno); 2064 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2074 2065
2075 status = ocfs2_find_entry(name, namelen, inode, lookup); 2066 status = ocfs2_find_entry(name, namelen, inode, lookup);
2076 if (status) 2067 if (status)
@@ -2114,8 +2105,8 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
2114 int ret; 2105 int ret;
2115 struct ocfs2_dir_lookup_result lookup = { NULL, }; 2106 struct ocfs2_dir_lookup_result lookup = { NULL, };
2116 2107
2117 mlog_entry("dir %llu, name '%.*s'\n", 2108 trace_ocfs2_check_dir_for_entry(
2118 (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); 2109 (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
2119 2110
2120 ret = -EEXIST; 2111 ret = -EEXIST;
2121 if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) 2112 if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0)
@@ -2125,7 +2116,8 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
2125bail: 2116bail:
2126 ocfs2_free_dir_lookup_result(&lookup); 2117 ocfs2_free_dir_lookup_result(&lookup);
2127 2118
2128 mlog_exit(ret); 2119 if (ret)
2120 mlog_errno(ret);
2129 return ret; 2121 return ret;
2130} 2122}
2131 2123
@@ -2324,8 +2316,6 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2324 struct buffer_head *new_bh = NULL; 2316 struct buffer_head *new_bh = NULL;
2325 struct ocfs2_dir_entry *de; 2317 struct ocfs2_dir_entry *de;
2326 2318
2327 mlog_entry_void();
2328
2329 if (ocfs2_new_dir_wants_trailer(inode)) 2319 if (ocfs2_new_dir_wants_trailer(inode))
2330 size = ocfs2_dir_trailer_blk_off(parent->i_sb); 2320 size = ocfs2_dir_trailer_blk_off(parent->i_sb);
2331 2321
@@ -2380,7 +2370,6 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2380bail: 2370bail:
2381 brelse(new_bh); 2371 brelse(new_bh);
2382 2372
2383 mlog_exit(status);
2384 return status; 2373 return status;
2385} 2374}
2386 2375
@@ -2409,9 +2398,9 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2409 goto out; 2398 goto out;
2410 } 2399 }
2411 2400
2412 mlog(0, "Dir %llu, attach new index block: %llu\n", 2401 trace_ocfs2_dx_dir_attach_index(
2413 (unsigned long long)OCFS2_I(dir)->ip_blkno, 2402 (unsigned long long)OCFS2_I(dir)->ip_blkno,
2414 (unsigned long long)dr_blkno); 2403 (unsigned long long)dr_blkno);
2415 2404
2416 dx_root_bh = sb_getblk(osb->sb, dr_blkno); 2405 dx_root_bh = sb_getblk(osb->sb, dr_blkno);
2417 if (dx_root_bh == NULL) { 2406 if (dx_root_bh == NULL) {
@@ -2511,11 +2500,10 @@ static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb,
2511 dx_leaf->dl_list.de_count = 2500 dx_leaf->dl_list.de_count =
2512 cpu_to_le16(ocfs2_dx_entries_per_leaf(osb->sb)); 2501 cpu_to_le16(ocfs2_dx_entries_per_leaf(osb->sb));
2513 2502
2514 mlog(0, 2503 trace_ocfs2_dx_dir_format_cluster(
2515 "Dir %llu, format dx_leaf: %llu, entry count: %u\n", 2504 (unsigned long long)OCFS2_I(dir)->ip_blkno,
2516 (unsigned long long)OCFS2_I(dir)->ip_blkno, 2505 (unsigned long long)bh->b_blocknr,
2517 (unsigned long long)bh->b_blocknr, 2506 le16_to_cpu(dx_leaf->dl_list.de_count));
2518 le16_to_cpu(dx_leaf->dl_list.de_count));
2519 2507
2520 ocfs2_journal_dirty(handle, bh); 2508 ocfs2_journal_dirty(handle, bh);
2521 } 2509 }
@@ -2759,12 +2747,11 @@ static void ocfs2_dx_dir_index_root_block(struct inode *dir,
2759 2747
2760 ocfs2_dx_dir_name_hash(dir, de->name, de->name_len, &hinfo); 2748 ocfs2_dx_dir_name_hash(dir, de->name, de->name_len, &hinfo);
2761 2749
2762 mlog(0, 2750 trace_ocfs2_dx_dir_index_root_block(
2763 "dir: %llu, major: 0x%x minor: 0x%x, index: %u, name: %.*s\n", 2751 (unsigned long long)dir->i_ino,
2764 (unsigned long long)dir->i_ino, hinfo.major_hash, 2752 hinfo.major_hash, hinfo.minor_hash,
2765 hinfo.minor_hash, 2753 de->name_len, de->name,
2766 le16_to_cpu(dx_root->dr_entries.de_num_used), 2754 le16_to_cpu(dx_root->dr_entries.de_num_used));
2767 de->name_len, de->name);
2768 2755
2769 ocfs2_dx_entry_list_insert(&dx_root->dr_entries, &hinfo, 2756 ocfs2_dx_entry_list_insert(&dx_root->dr_entries, &hinfo,
2770 dirent_blk); 2757 dirent_blk);
@@ -3235,7 +3222,6 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
3235bail: 3222bail:
3236 if (did_quota && status < 0) 3223 if (did_quota && status < 0)
3237 dquot_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1)); 3224 dquot_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
3238 mlog_exit(status);
3239 return status; 3225 return status;
3240} 3226}
3241 3227
@@ -3270,8 +3256,6 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
3270 struct ocfs2_extent_tree et; 3256 struct ocfs2_extent_tree et;
3271 struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh; 3257 struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
3272 3258
3273 mlog_entry_void();
3274
3275 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 3259 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
3276 /* 3260 /*
3277 * This would be a code error as an inline directory should 3261 * This would be a code error as an inline directory should
@@ -3320,8 +3304,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
3320 down_write(&OCFS2_I(dir)->ip_alloc_sem); 3304 down_write(&OCFS2_I(dir)->ip_alloc_sem);
3321 drop_alloc_sem = 1; 3305 drop_alloc_sem = 1;
3322 dir_i_size = i_size_read(dir); 3306 dir_i_size = i_size_read(dir);
3323 mlog(0, "extending dir %llu (i_size = %lld)\n", 3307 trace_ocfs2_extend_dir((unsigned long long)OCFS2_I(dir)->ip_blkno,
3324 (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size); 3308 dir_i_size);
3325 3309
3326 /* dir->i_size is always block aligned. */ 3310 /* dir->i_size is always block aligned. */
3327 spin_lock(&OCFS2_I(dir)->ip_lock); 3311 spin_lock(&OCFS2_I(dir)->ip_lock);
@@ -3436,7 +3420,6 @@ bail:
3436 3420
3437 brelse(new_bh); 3421 brelse(new_bh);
3438 3422
3439 mlog_exit(status);
3440 return status; 3423 return status;
3441} 3424}
3442 3425
@@ -3583,8 +3566,9 @@ next:
3583 status = 0; 3566 status = 0;
3584bail: 3567bail:
3585 brelse(bh); 3568 brelse(bh);
3569 if (status)
3570 mlog_errno(status);
3586 3571
3587 mlog_exit(status);
3588 return status; 3572 return status;
3589} 3573}
3590 3574
@@ -3815,9 +3799,9 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3815 struct ocfs2_dx_root_block *dx_root; 3799 struct ocfs2_dx_root_block *dx_root;
3816 struct ocfs2_dx_leaf *tmp_dx_leaf = NULL; 3800 struct ocfs2_dx_leaf *tmp_dx_leaf = NULL;
3817 3801
3818 mlog(0, "DX Dir: %llu, rebalance leaf leaf_blkno: %llu insert: %u\n", 3802 trace_ocfs2_dx_dir_rebalance((unsigned long long)OCFS2_I(dir)->ip_blkno,
3819 (unsigned long long)OCFS2_I(dir)->ip_blkno, 3803 (unsigned long long)leaf_blkno,
3820 (unsigned long long)leaf_blkno, insert_hash); 3804 insert_hash);
3821 3805
3822 ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh); 3806 ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
3823 3807
@@ -3897,8 +3881,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3897 goto out_commit; 3881 goto out_commit;
3898 } 3882 }
3899 3883
3900 mlog(0, "Split leaf (%u) at %u, insert major hash is %u\n", 3884 trace_ocfs2_dx_dir_rebalance_split(leaf_cpos, split_hash, insert_hash);
3901 leaf_cpos, split_hash, insert_hash);
3902 3885
3903 /* 3886 /*
3904 * We have to carefully order operations here. There are items 3887 * We have to carefully order operations here. There are items
@@ -4355,8 +4338,8 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
4355 unsigned int blocks_wanted = 1; 4338 unsigned int blocks_wanted = 1;
4356 struct buffer_head *bh = NULL; 4339 struct buffer_head *bh = NULL;
4357 4340
4358 mlog(0, "getting ready to insert namelen %d into dir %llu\n", 4341 trace_ocfs2_prepare_dir_for_insert(
4359 namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno); 4342 (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen);
4360 4343
4361 if (!namelen) { 4344 if (!namelen) {
4362 ret = -EINVAL; 4345 ret = -EINVAL;
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile
index dcebf0d920fa..c8a044efbb15 100644
--- a/fs/ocfs2/dlm/Makefile
+++ b/fs/ocfs2/dlm/Makefile
@@ -1,4 +1,4 @@
1EXTRA_CFLAGS += -Ifs/ocfs2 1ccflags-y := -Ifs/ocfs2
2 2
3obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o 3obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o
4 4
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 9f30491e5e88..29a886d1e82c 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -128,8 +128,8 @@ static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm,
128 128
129 assert_spin_locked(&res->spinlock); 129 assert_spin_locked(&res->spinlock);
130 130
131 mlog_entry("type=%d, convert_type=%d, new convert_type=%d\n", 131 mlog(0, "type=%d, convert_type=%d, new convert_type=%d\n",
132 lock->ml.type, lock->ml.convert_type, type); 132 lock->ml.type, lock->ml.convert_type, type);
133 133
134 spin_lock(&lock->spinlock); 134 spin_lock(&lock->spinlock);
135 135
@@ -353,7 +353,7 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
353 struct kvec vec[2]; 353 struct kvec vec[2];
354 size_t veclen = 1; 354 size_t veclen = 1;
355 355
356 mlog_entry("%.*s\n", res->lockname.len, res->lockname.name); 356 mlog(0, "%.*s\n", res->lockname.len, res->lockname.name);
357 357
358 memset(&convert, 0, sizeof(struct dlm_convert_lock)); 358 memset(&convert, 0, sizeof(struct dlm_convert_lock));
359 convert.node_idx = dlm->node_num; 359 convert.node_idx = dlm->node_num;
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 7e38a072d720..7540a492eaba 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -188,7 +188,7 @@ struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm,
188 struct hlist_head *bucket; 188 struct hlist_head *bucket;
189 struct hlist_node *list; 189 struct hlist_node *list;
190 190
191 mlog_entry("%.*s\n", len, name); 191 mlog(0, "%.*s\n", len, name);
192 192
193 assert_spin_locked(&dlm->spinlock); 193 assert_spin_locked(&dlm->spinlock);
194 194
@@ -222,7 +222,7 @@ struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
222{ 222{
223 struct dlm_lock_resource *res = NULL; 223 struct dlm_lock_resource *res = NULL;
224 224
225 mlog_entry("%.*s\n", len, name); 225 mlog(0, "%.*s\n", len, name);
226 226
227 assert_spin_locked(&dlm->spinlock); 227 assert_spin_locked(&dlm->spinlock);
228 228
@@ -531,7 +531,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
531 unsigned int node; 531 unsigned int node;
532 struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf; 532 struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf;
533 533
534 mlog_entry("%p %u %p", msg, len, data); 534 mlog(0, "%p %u %p", msg, len, data);
535 535
536 if (!dlm_grab(dlm)) 536 if (!dlm_grab(dlm))
537 return 0; 537 return 0;
@@ -926,9 +926,10 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
926} 926}
927 927
928static int dlm_match_regions(struct dlm_ctxt *dlm, 928static int dlm_match_regions(struct dlm_ctxt *dlm,
929 struct dlm_query_region *qr) 929 struct dlm_query_region *qr,
930 char *local, int locallen)
930{ 931{
931 char *local = NULL, *remote = qr->qr_regions; 932 char *remote = qr->qr_regions;
932 char *l, *r; 933 char *l, *r;
933 int localnr, i, j, foundit; 934 int localnr, i, j, foundit;
934 int status = 0; 935 int status = 0;
@@ -957,13 +958,8 @@ static int dlm_match_regions(struct dlm_ctxt *dlm,
957 r += O2HB_MAX_REGION_NAME_LEN; 958 r += O2HB_MAX_REGION_NAME_LEN;
958 } 959 }
959 960
960 local = kmalloc(sizeof(qr->qr_regions), GFP_ATOMIC); 961 localnr = min(O2NM_MAX_REGIONS, locallen/O2HB_MAX_REGION_NAME_LEN);
961 if (!local) { 962 localnr = o2hb_get_all_regions(local, (u8)localnr);
962 status = -ENOMEM;
963 goto bail;
964 }
965
966 localnr = o2hb_get_all_regions(local, O2NM_MAX_REGIONS);
967 963
968 /* compare local regions with remote */ 964 /* compare local regions with remote */
969 l = local; 965 l = local;
@@ -1012,8 +1008,6 @@ static int dlm_match_regions(struct dlm_ctxt *dlm,
1012 } 1008 }
1013 1009
1014bail: 1010bail:
1015 kfree(local);
1016
1017 return status; 1011 return status;
1018} 1012}
1019 1013
@@ -1075,6 +1069,7 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
1075{ 1069{
1076 struct dlm_query_region *qr; 1070 struct dlm_query_region *qr;
1077 struct dlm_ctxt *dlm = NULL; 1071 struct dlm_ctxt *dlm = NULL;
1072 char *local = NULL;
1078 int status = 0; 1073 int status = 0;
1079 int locked = 0; 1074 int locked = 0;
1080 1075
@@ -1083,6 +1078,13 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
1083 mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node, 1078 mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node,
1084 qr->qr_domain); 1079 qr->qr_domain);
1085 1080
1081 /* buffer used in dlm_mast_regions() */
1082 local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL);
1083 if (!local) {
1084 status = -ENOMEM;
1085 goto bail;
1086 }
1087
1086 status = -EINVAL; 1088 status = -EINVAL;
1087 1089
1088 spin_lock(&dlm_domain_lock); 1090 spin_lock(&dlm_domain_lock);
@@ -1112,13 +1114,15 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
1112 goto bail; 1114 goto bail;
1113 } 1115 }
1114 1116
1115 status = dlm_match_regions(dlm, qr); 1117 status = dlm_match_regions(dlm, qr, local, sizeof(qr->qr_regions));
1116 1118
1117bail: 1119bail:
1118 if (locked) 1120 if (locked)
1119 spin_unlock(&dlm->spinlock); 1121 spin_unlock(&dlm->spinlock);
1120 spin_unlock(&dlm_domain_lock); 1122 spin_unlock(&dlm_domain_lock);
1121 1123
1124 kfree(local);
1125
1122 return status; 1126 return status;
1123} 1127}
1124 1128
@@ -1553,7 +1557,7 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
1553 struct domain_join_ctxt *ctxt; 1557 struct domain_join_ctxt *ctxt;
1554 enum dlm_query_join_response_code response = JOIN_DISALLOW; 1558 enum dlm_query_join_response_code response = JOIN_DISALLOW;
1555 1559
1556 mlog_entry("%p", dlm); 1560 mlog(0, "%p", dlm);
1557 1561
1558 ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); 1562 ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
1559 if (!ctxt) { 1563 if (!ctxt) {
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 7009292aac5a..8d39e0fd66f7 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -128,7 +128,7 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
128 int call_ast = 0, kick_thread = 0; 128 int call_ast = 0, kick_thread = 0;
129 enum dlm_status status = DLM_NORMAL; 129 enum dlm_status status = DLM_NORMAL;
130 130
131 mlog_entry("type=%d\n", lock->ml.type); 131 mlog(0, "type=%d\n", lock->ml.type);
132 132
133 spin_lock(&res->spinlock); 133 spin_lock(&res->spinlock);
134 /* if called from dlm_create_lock_handler, need to 134 /* if called from dlm_create_lock_handler, need to
@@ -227,8 +227,8 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
227 enum dlm_status status = DLM_DENIED; 227 enum dlm_status status = DLM_DENIED;
228 int lockres_changed = 1; 228 int lockres_changed = 1;
229 229
230 mlog_entry("type=%d\n", lock->ml.type); 230 mlog(0, "type=%d, lockres %.*s, flags = 0x%x\n",
231 mlog(0, "lockres %.*s, flags = 0x%x\n", res->lockname.len, 231 lock->ml.type, res->lockname.len,
232 res->lockname.name, flags); 232 res->lockname.name, flags);
233 233
234 spin_lock(&res->spinlock); 234 spin_lock(&res->spinlock);
@@ -308,8 +308,6 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
308 int tmpret, status = 0; 308 int tmpret, status = 0;
309 enum dlm_status ret; 309 enum dlm_status ret;
310 310
311 mlog_entry_void();
312
313 memset(&create, 0, sizeof(create)); 311 memset(&create, 0, sizeof(create));
314 create.node_idx = dlm->node_num; 312 create.node_idx = dlm->node_num;
315 create.requested_type = lock->ml.type; 313 create.requested_type = lock->ml.type;
@@ -477,8 +475,6 @@ int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data,
477 475
478 BUG_ON(!dlm); 476 BUG_ON(!dlm);
479 477
480 mlog_entry_void();
481
482 if (!dlm_grab(dlm)) 478 if (!dlm_grab(dlm))
483 return DLM_REJECTED; 479 return DLM_REJECTED;
484 480
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 59f0f6bdfc62..9d67610dfc74 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -426,8 +426,6 @@ static void dlm_mle_release(struct kref *kref)
426 struct dlm_master_list_entry *mle; 426 struct dlm_master_list_entry *mle;
427 struct dlm_ctxt *dlm; 427 struct dlm_ctxt *dlm;
428 428
429 mlog_entry_void();
430
431 mle = container_of(kref, struct dlm_master_list_entry, mle_refs); 429 mle = container_of(kref, struct dlm_master_list_entry, mle_refs);
432 dlm = mle->dlm; 430 dlm = mle->dlm;
433 431
@@ -3120,8 +3118,6 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
3120 3118
3121 *oldmle = NULL; 3119 *oldmle = NULL;
3122 3120
3123 mlog_entry_void();
3124
3125 assert_spin_locked(&dlm->spinlock); 3121 assert_spin_locked(&dlm->spinlock);
3126 assert_spin_locked(&dlm->master_lock); 3122 assert_spin_locked(&dlm->master_lock);
3127 3123
@@ -3261,7 +3257,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
3261 struct hlist_node *list; 3257 struct hlist_node *list;
3262 unsigned int i; 3258 unsigned int i;
3263 3259
3264 mlog_entry("dlm=%s, dead node=%u\n", dlm->name, dead_node); 3260 mlog(0, "dlm=%s, dead node=%u\n", dlm->name, dead_node);
3265top: 3261top:
3266 assert_spin_locked(&dlm->spinlock); 3262 assert_spin_locked(&dlm->spinlock);
3267 3263
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index aaaffbcbe916..f1beb6fc254d 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -727,7 +727,6 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
727 if (destroy) 727 if (destroy)
728 dlm_destroy_recovery_area(dlm, dead_node); 728 dlm_destroy_recovery_area(dlm, dead_node);
729 729
730 mlog_exit(status);
731 return status; 730 return status;
732} 731}
733 732
@@ -1496,9 +1495,9 @@ leave:
1496 kfree(buf); 1495 kfree(buf);
1497 if (item) 1496 if (item)
1498 kfree(item); 1497 kfree(item);
1498 mlog_errno(ret);
1499 } 1499 }
1500 1500
1501 mlog_exit(ret);
1502 return ret; 1501 return ret;
1503} 1502}
1504 1503
@@ -1567,7 +1566,6 @@ leave:
1567 dlm_lockres_put(res); 1566 dlm_lockres_put(res);
1568 } 1567 }
1569 kfree(data); 1568 kfree(data);
1570 mlog_exit(ret);
1571} 1569}
1572 1570
1573 1571
@@ -1986,7 +1984,6 @@ leave:
1986 dlm_lock_put(newlock); 1984 dlm_lock_put(newlock);
1987 } 1985 }
1988 1986
1989 mlog_exit(ret);
1990 return ret; 1987 return ret;
1991} 1988}
1992 1989
@@ -2083,8 +2080,6 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
2083 struct hlist_head *bucket; 2080 struct hlist_head *bucket;
2084 struct dlm_lock_resource *res, *next; 2081 struct dlm_lock_resource *res, *next;
2085 2082
2086 mlog_entry_void();
2087
2088 assert_spin_locked(&dlm->spinlock); 2083 assert_spin_locked(&dlm->spinlock);
2089 2084
2090 list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { 2085 list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
@@ -2607,8 +2602,6 @@ static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node)
2607 int nodenum; 2602 int nodenum;
2608 int status; 2603 int status;
2609 2604
2610 mlog_entry("%u\n", dead_node);
2611
2612 mlog(0, "%s: dead node is %u\n", dlm->name, dead_node); 2605 mlog(0, "%s: dead node is %u\n", dlm->name, dead_node);
2613 2606
2614 spin_lock(&dlm->spinlock); 2607 spin_lock(&dlm->spinlock);
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 817287c6a6db..850aa7e87537 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -317,7 +317,7 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
317 struct kvec vec[2]; 317 struct kvec vec[2];
318 size_t veclen = 1; 318 size_t veclen = 1;
319 319
320 mlog_entry("%.*s\n", res->lockname.len, res->lockname.name); 320 mlog(0, "%.*s\n", res->lockname.len, res->lockname.name);
321 321
322 if (owner == dlm->node_num) { 322 if (owner == dlm->node_num) {
323 /* ended up trying to contact ourself. this means 323 /* ended up trying to contact ourself. this means
@@ -588,8 +588,6 @@ enum dlm_status dlmunlock(struct dlm_ctxt *dlm, struct dlm_lockstatus *lksb,
588 struct dlm_lock *lock = NULL; 588 struct dlm_lock *lock = NULL;
589 int call_ast, is_master; 589 int call_ast, is_master;
590 590
591 mlog_entry_void();
592
593 if (!lksb) { 591 if (!lksb) {
594 dlm_error(DLM_BADARGS); 592 dlm_error(DLM_BADARGS);
595 return DLM_BADARGS; 593 return DLM_BADARGS;
diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile
index df69b4856d0d..f14be89a6701 100644
--- a/fs/ocfs2/dlmfs/Makefile
+++ b/fs/ocfs2/dlmfs/Makefile
@@ -1,4 +1,4 @@
1EXTRA_CFLAGS += -Ifs/ocfs2 1ccflags-y := -Ifs/ocfs2
2 2
3obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o 3obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o
4 4
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index e8d94d722ecb..7642d7ca73e5 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -64,7 +64,7 @@ struct ocfs2_mask_waiter {
64 unsigned long mw_mask; 64 unsigned long mw_mask;
65 unsigned long mw_goal; 65 unsigned long mw_goal;
66#ifdef CONFIG_OCFS2_FS_STATS 66#ifdef CONFIG_OCFS2_FS_STATS
67 unsigned long long mw_lock_start; 67 ktime_t mw_lock_start;
68#endif 68#endif
69}; 69};
70 70
@@ -397,8 +397,6 @@ static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
397{ 397{
398 int len; 398 int len;
399 399
400 mlog_entry_void();
401
402 BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); 400 BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
403 401
404 len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", 402 len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
@@ -408,8 +406,6 @@ static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
408 BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); 406 BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
409 407
410 mlog(0, "built lock resource with name: %s\n", name); 408 mlog(0, "built lock resource with name: %s\n", name);
411
412 mlog_exit_void();
413} 409}
414 410
415static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); 411static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
@@ -435,44 +431,41 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
435#ifdef CONFIG_OCFS2_FS_STATS 431#ifdef CONFIG_OCFS2_FS_STATS
436static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 432static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
437{ 433{
438 res->l_lock_num_prmode = 0;
439 res->l_lock_num_prmode_failed = 0;
440 res->l_lock_total_prmode = 0;
441 res->l_lock_max_prmode = 0;
442 res->l_lock_num_exmode = 0;
443 res->l_lock_num_exmode_failed = 0;
444 res->l_lock_total_exmode = 0;
445 res->l_lock_max_exmode = 0;
446 res->l_lock_refresh = 0; 434 res->l_lock_refresh = 0;
435 memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats));
436 memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats));
447} 437}
448 438
449static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, 439static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
450 struct ocfs2_mask_waiter *mw, int ret) 440 struct ocfs2_mask_waiter *mw, int ret)
451{ 441{
452 unsigned long long *num, *sum; 442 u32 usec;
453 unsigned int *max, *failed; 443 ktime_t kt;
454 struct timespec ts = current_kernel_time(); 444 struct ocfs2_lock_stats *stats;
455 unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start; 445
456 446 if (level == LKM_PRMODE)
457 if (level == LKM_PRMODE) { 447 stats = &res->l_lock_prmode;
458 num = &res->l_lock_num_prmode; 448 else if (level == LKM_EXMODE)
459 sum = &res->l_lock_total_prmode; 449 stats = &res->l_lock_exmode;
460 max = &res->l_lock_max_prmode; 450 else
461 failed = &res->l_lock_num_prmode_failed;
462 } else if (level == LKM_EXMODE) {
463 num = &res->l_lock_num_exmode;
464 sum = &res->l_lock_total_exmode;
465 max = &res->l_lock_max_exmode;
466 failed = &res->l_lock_num_exmode_failed;
467 } else
468 return; 451 return;
469 452
470 (*num)++; 453 kt = ktime_sub(ktime_get(), mw->mw_lock_start);
471 (*sum) += time; 454 usec = ktime_to_us(kt);
472 if (time > *max) 455
473 *max = time; 456 stats->ls_gets++;
457 stats->ls_total += ktime_to_ns(kt);
458 /* overflow */
459 if (unlikely(stats->ls_gets) == 0) {
460 stats->ls_gets++;
461 stats->ls_total = ktime_to_ns(kt);
462 }
463
464 if (stats->ls_max < usec)
465 stats->ls_max = usec;
466
474 if (ret) 467 if (ret)
475 (*failed)++; 468 stats->ls_fail++;
476} 469}
477 470
478static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 471static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
@@ -482,8 +475,7 @@ static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
482 475
483static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 476static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
484{ 477{
485 struct timespec ts = current_kernel_time(); 478 mw->mw_lock_start = ktime_get();
486 mw->mw_lock_start = timespec_to_ns(&ts);
487} 479}
488#else 480#else
489static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 481static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
@@ -729,8 +721,6 @@ void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
729 721
730void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 722void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
731{ 723{
732 mlog_entry_void();
733
734 if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) 724 if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
735 return; 725 return;
736 726
@@ -756,14 +746,11 @@ void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
756 memset(&res->l_lksb, 0, sizeof(res->l_lksb)); 746 memset(&res->l_lksb, 0, sizeof(res->l_lksb));
757 747
758 res->l_flags = 0UL; 748 res->l_flags = 0UL;
759 mlog_exit_void();
760} 749}
761 750
762static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, 751static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
763 int level) 752 int level)
764{ 753{
765 mlog_entry_void();
766
767 BUG_ON(!lockres); 754 BUG_ON(!lockres);
768 755
769 switch(level) { 756 switch(level) {
@@ -776,15 +763,11 @@ static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
776 default: 763 default:
777 BUG(); 764 BUG();
778 } 765 }
779
780 mlog_exit_void();
781} 766}
782 767
783static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, 768static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
784 int level) 769 int level)
785{ 770{
786 mlog_entry_void();
787
788 BUG_ON(!lockres); 771 BUG_ON(!lockres);
789 772
790 switch(level) { 773 switch(level) {
@@ -799,7 +782,6 @@ static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
799 default: 782 default:
800 BUG(); 783 BUG();
801 } 784 }
802 mlog_exit_void();
803} 785}
804 786
805/* WARNING: This function lives in a world where the only three lock 787/* WARNING: This function lives in a world where the only three lock
@@ -846,8 +828,6 @@ static void lockres_clear_flags(struct ocfs2_lock_res *lockres,
846 828
847static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) 829static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
848{ 830{
849 mlog_entry_void();
850
851 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 831 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
852 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 832 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
853 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 833 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
@@ -860,14 +840,10 @@ static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res
860 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 840 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
861 } 841 }
862 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 842 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
863
864 mlog_exit_void();
865} 843}
866 844
867static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) 845static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
868{ 846{
869 mlog_entry_void();
870
871 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 847 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
872 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 848 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
873 849
@@ -889,14 +865,10 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
889 lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 865 lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
890 866
891 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 867 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
892
893 mlog_exit_void();
894} 868}
895 869
896static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) 870static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
897{ 871{
898 mlog_entry_void();
899
900 BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); 872 BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
901 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 873 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
902 874
@@ -908,15 +880,12 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc
908 lockres->l_level = lockres->l_requested; 880 lockres->l_level = lockres->l_requested;
909 lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); 881 lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
910 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 882 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
911
912 mlog_exit_void();
913} 883}
914 884
915static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 885static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
916 int level) 886 int level)
917{ 887{
918 int needs_downconvert = 0; 888 int needs_downconvert = 0;
919 mlog_entry_void();
920 889
921 assert_spin_locked(&lockres->l_lock); 890 assert_spin_locked(&lockres->l_lock);
922 891
@@ -938,8 +907,7 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
938 907
939 if (needs_downconvert) 908 if (needs_downconvert)
940 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 909 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
941 910 mlog(0, "needs_downconvert = %d\n", needs_downconvert);
942 mlog_exit(needs_downconvert);
943 return needs_downconvert; 911 return needs_downconvert;
944} 912}
945 913
@@ -1151,8 +1119,6 @@ static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
1151 struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1119 struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1152 unsigned long flags; 1120 unsigned long flags;
1153 1121
1154 mlog_entry_void();
1155
1156 mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n", 1122 mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n",
1157 lockres->l_name, lockres->l_unlock_action); 1123 lockres->l_name, lockres->l_unlock_action);
1158 1124
@@ -1162,7 +1128,6 @@ static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
1162 "unlock_action %d\n", error, lockres->l_name, 1128 "unlock_action %d\n", error, lockres->l_name,
1163 lockres->l_unlock_action); 1129 lockres->l_unlock_action);
1164 spin_unlock_irqrestore(&lockres->l_lock, flags); 1130 spin_unlock_irqrestore(&lockres->l_lock, flags);
1165 mlog_exit_void();
1166 return; 1131 return;
1167 } 1132 }
1168 1133
@@ -1186,8 +1151,6 @@ static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
1186 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1151 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1187 wake_up(&lockres->l_event); 1152 wake_up(&lockres->l_event);
1188 spin_unlock_irqrestore(&lockres->l_lock, flags); 1153 spin_unlock_irqrestore(&lockres->l_lock, flags);
1189
1190 mlog_exit_void();
1191} 1154}
1192 1155
1193/* 1156/*
@@ -1233,7 +1196,6 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1233{ 1196{
1234 unsigned long flags; 1197 unsigned long flags;
1235 1198
1236 mlog_entry_void();
1237 spin_lock_irqsave(&lockres->l_lock, flags); 1199 spin_lock_irqsave(&lockres->l_lock, flags);
1238 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1200 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1239 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 1201 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
@@ -1244,7 +1206,6 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1244 spin_unlock_irqrestore(&lockres->l_lock, flags); 1206 spin_unlock_irqrestore(&lockres->l_lock, flags);
1245 1207
1246 wake_up(&lockres->l_event); 1208 wake_up(&lockres->l_event);
1247 mlog_exit_void();
1248} 1209}
1249 1210
1250/* Note: If we detect another process working on the lock (i.e., 1211/* Note: If we detect another process working on the lock (i.e.,
@@ -1260,8 +1221,6 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
1260 unsigned long flags; 1221 unsigned long flags;
1261 unsigned int gen; 1222 unsigned int gen;
1262 1223
1263 mlog_entry_void();
1264
1265 mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, 1224 mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,
1266 dlm_flags); 1225 dlm_flags);
1267 1226
@@ -1293,7 +1252,6 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
1293 mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); 1252 mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);
1294 1253
1295bail: 1254bail:
1296 mlog_exit(ret);
1297 return ret; 1255 return ret;
1298} 1256}
1299 1257
@@ -1416,8 +1374,6 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1416 unsigned int gen; 1374 unsigned int gen;
1417 int noqueue_attempted = 0; 1375 int noqueue_attempted = 0;
1418 1376
1419 mlog_entry_void();
1420
1421 ocfs2_init_mask_waiter(&mw); 1377 ocfs2_init_mask_waiter(&mw);
1422 1378
1423 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1379 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
@@ -1583,7 +1539,6 @@ out:
1583 caller_ip); 1539 caller_ip);
1584 } 1540 }
1585#endif 1541#endif
1586 mlog_exit(ret);
1587 return ret; 1542 return ret;
1588} 1543}
1589 1544
@@ -1605,7 +1560,6 @@ static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1605{ 1560{
1606 unsigned long flags; 1561 unsigned long flags;
1607 1562
1608 mlog_entry_void();
1609 spin_lock_irqsave(&lockres->l_lock, flags); 1563 spin_lock_irqsave(&lockres->l_lock, flags);
1610 ocfs2_dec_holders(lockres, level); 1564 ocfs2_dec_holders(lockres, level);
1611 ocfs2_downconvert_on_unlock(osb, lockres); 1565 ocfs2_downconvert_on_unlock(osb, lockres);
@@ -1614,7 +1568,6 @@ static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1614 if (lockres->l_lockdep_map.key != NULL) 1568 if (lockres->l_lockdep_map.key != NULL)
1615 rwsem_release(&lockres->l_lockdep_map, 1, caller_ip); 1569 rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
1616#endif 1570#endif
1617 mlog_exit_void();
1618} 1571}
1619 1572
1620static int ocfs2_create_new_lock(struct ocfs2_super *osb, 1573static int ocfs2_create_new_lock(struct ocfs2_super *osb,
@@ -1648,8 +1601,6 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
1648 BUG_ON(!inode); 1601 BUG_ON(!inode);
1649 BUG_ON(!ocfs2_inode_is_new(inode)); 1602 BUG_ON(!ocfs2_inode_is_new(inode));
1650 1603
1651 mlog_entry_void();
1652
1653 mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 1604 mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
1654 1605
1655 /* NOTE: That we don't increment any of the holder counts, nor 1606 /* NOTE: That we don't increment any of the holder counts, nor
@@ -1683,7 +1634,6 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
1683 } 1634 }
1684 1635
1685bail: 1636bail:
1686 mlog_exit(ret);
1687 return ret; 1637 return ret;
1688} 1638}
1689 1639
@@ -1695,16 +1645,12 @@ int ocfs2_rw_lock(struct inode *inode, int write)
1695 1645
1696 BUG_ON(!inode); 1646 BUG_ON(!inode);
1697 1647
1698 mlog_entry_void();
1699
1700 mlog(0, "inode %llu take %s RW lock\n", 1648 mlog(0, "inode %llu take %s RW lock\n",
1701 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1649 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1702 write ? "EXMODE" : "PRMODE"); 1650 write ? "EXMODE" : "PRMODE");
1703 1651
1704 if (ocfs2_mount_local(osb)) { 1652 if (ocfs2_mount_local(osb))
1705 mlog_exit(0);
1706 return 0; 1653 return 0;
1707 }
1708 1654
1709 lockres = &OCFS2_I(inode)->ip_rw_lockres; 1655 lockres = &OCFS2_I(inode)->ip_rw_lockres;
1710 1656
@@ -1715,7 +1661,6 @@ int ocfs2_rw_lock(struct inode *inode, int write)
1715 if (status < 0) 1661 if (status < 0)
1716 mlog_errno(status); 1662 mlog_errno(status);
1717 1663
1718 mlog_exit(status);
1719 return status; 1664 return status;
1720} 1665}
1721 1666
@@ -1725,16 +1670,12 @@ void ocfs2_rw_unlock(struct inode *inode, int write)
1725 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1670 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
1726 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1671 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1727 1672
1728 mlog_entry_void();
1729
1730 mlog(0, "inode %llu drop %s RW lock\n", 1673 mlog(0, "inode %llu drop %s RW lock\n",
1731 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1674 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1732 write ? "EXMODE" : "PRMODE"); 1675 write ? "EXMODE" : "PRMODE");
1733 1676
1734 if (!ocfs2_mount_local(osb)) 1677 if (!ocfs2_mount_local(osb))
1735 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1678 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1736
1737 mlog_exit_void();
1738} 1679}
1739 1680
1740/* 1681/*
@@ -1748,8 +1689,6 @@ int ocfs2_open_lock(struct inode *inode)
1748 1689
1749 BUG_ON(!inode); 1690 BUG_ON(!inode);
1750 1691
1751 mlog_entry_void();
1752
1753 mlog(0, "inode %llu take PRMODE open lock\n", 1692 mlog(0, "inode %llu take PRMODE open lock\n",
1754 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1693 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1755 1694
@@ -1764,7 +1703,6 @@ int ocfs2_open_lock(struct inode *inode)
1764 mlog_errno(status); 1703 mlog_errno(status);
1765 1704
1766out: 1705out:
1767 mlog_exit(status);
1768 return status; 1706 return status;
1769} 1707}
1770 1708
@@ -1776,8 +1714,6 @@ int ocfs2_try_open_lock(struct inode *inode, int write)
1776 1714
1777 BUG_ON(!inode); 1715 BUG_ON(!inode);
1778 1716
1779 mlog_entry_void();
1780
1781 mlog(0, "inode %llu try to take %s open lock\n", 1717 mlog(0, "inode %llu try to take %s open lock\n",
1782 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1718 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1783 write ? "EXMODE" : "PRMODE"); 1719 write ? "EXMODE" : "PRMODE");
@@ -1799,7 +1735,6 @@ int ocfs2_try_open_lock(struct inode *inode, int write)
1799 level, DLM_LKF_NOQUEUE, 0); 1735 level, DLM_LKF_NOQUEUE, 0);
1800 1736
1801out: 1737out:
1802 mlog_exit(status);
1803 return status; 1738 return status;
1804} 1739}
1805 1740
@@ -1811,8 +1746,6 @@ void ocfs2_open_unlock(struct inode *inode)
1811 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; 1746 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
1812 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1747 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1813 1748
1814 mlog_entry_void();
1815
1816 mlog(0, "inode %llu drop open lock\n", 1749 mlog(0, "inode %llu drop open lock\n",
1817 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1750 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1818 1751
@@ -1827,7 +1760,7 @@ void ocfs2_open_unlock(struct inode *inode)
1827 DLM_LOCK_EX); 1760 DLM_LOCK_EX);
1828 1761
1829out: 1762out:
1830 mlog_exit_void(); 1763 return;
1831} 1764}
1832 1765
1833static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, 1766static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
@@ -2043,8 +1976,6 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
2043{ 1976{
2044 int kick = 0; 1977 int kick = 0;
2045 1978
2046 mlog_entry_void();
2047
2048 /* If we know that another node is waiting on our lock, kick 1979 /* If we know that another node is waiting on our lock, kick
2049 * the downconvert thread * pre-emptively when we reach a release 1980 * the downconvert thread * pre-emptively when we reach a release
2050 * condition. */ 1981 * condition. */
@@ -2065,8 +1996,6 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
2065 1996
2066 if (kick) 1997 if (kick)
2067 ocfs2_wake_downconvert_thread(osb); 1998 ocfs2_wake_downconvert_thread(osb);
2068
2069 mlog_exit_void();
2070} 1999}
2071 2000
2072#define OCFS2_SEC_BITS 34 2001#define OCFS2_SEC_BITS 34
@@ -2095,8 +2024,6 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2095 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2024 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2096 struct ocfs2_meta_lvb *lvb; 2025 struct ocfs2_meta_lvb *lvb;
2097 2026
2098 mlog_entry_void();
2099
2100 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2027 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2101 2028
2102 /* 2029 /*
@@ -2128,8 +2055,6 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2128 2055
2129out: 2056out:
2130 mlog_meta_lvb(0, lockres); 2057 mlog_meta_lvb(0, lockres);
2131
2132 mlog_exit_void();
2133} 2058}
2134 2059
2135static void ocfs2_unpack_timespec(struct timespec *spec, 2060static void ocfs2_unpack_timespec(struct timespec *spec,
@@ -2145,8 +2070,6 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2145 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2070 struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2146 struct ocfs2_meta_lvb *lvb; 2071 struct ocfs2_meta_lvb *lvb;
2147 2072
2148 mlog_entry_void();
2149
2150 mlog_meta_lvb(0, lockres); 2073 mlog_meta_lvb(0, lockres);
2151 2074
2152 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2075 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
@@ -2177,8 +2100,6 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2177 ocfs2_unpack_timespec(&inode->i_ctime, 2100 ocfs2_unpack_timespec(&inode->i_ctime,
2178 be64_to_cpu(lvb->lvb_ictime_packed)); 2101 be64_to_cpu(lvb->lvb_ictime_packed));
2179 spin_unlock(&oi->ip_lock); 2102 spin_unlock(&oi->ip_lock);
2180
2181 mlog_exit_void();
2182} 2103}
2183 2104
2184static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 2105static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
@@ -2205,8 +2126,6 @@ static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)
2205 unsigned long flags; 2126 unsigned long flags;
2206 int status = 0; 2127 int status = 0;
2207 2128
2208 mlog_entry_void();
2209
2210refresh_check: 2129refresh_check:
2211 spin_lock_irqsave(&lockres->l_lock, flags); 2130 spin_lock_irqsave(&lockres->l_lock, flags);
2212 if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { 2131 if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
@@ -2227,7 +2146,7 @@ refresh_check:
2227 2146
2228 status = 1; 2147 status = 1;
2229bail: 2148bail:
2230 mlog_exit(status); 2149 mlog(0, "status %d\n", status);
2231 return status; 2150 return status;
2232} 2151}
2233 2152
@@ -2237,7 +2156,6 @@ static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockre
2237 int status) 2156 int status)
2238{ 2157{
2239 unsigned long flags; 2158 unsigned long flags;
2240 mlog_entry_void();
2241 2159
2242 spin_lock_irqsave(&lockres->l_lock, flags); 2160 spin_lock_irqsave(&lockres->l_lock, flags);
2243 lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); 2161 lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
@@ -2246,8 +2164,6 @@ static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockre
2246 spin_unlock_irqrestore(&lockres->l_lock, flags); 2164 spin_unlock_irqrestore(&lockres->l_lock, flags);
2247 2165
2248 wake_up(&lockres->l_event); 2166 wake_up(&lockres->l_event);
2249
2250 mlog_exit_void();
2251} 2167}
2252 2168
2253/* may or may not return a bh if it went to disk. */ 2169/* may or may not return a bh if it went to disk. */
@@ -2260,8 +2176,6 @@ static int ocfs2_inode_lock_update(struct inode *inode,
2260 struct ocfs2_dinode *fe; 2176 struct ocfs2_dinode *fe;
2261 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2177 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2262 2178
2263 mlog_entry_void();
2264
2265 if (ocfs2_mount_local(osb)) 2179 if (ocfs2_mount_local(osb))
2266 goto bail; 2180 goto bail;
2267 2181
@@ -2330,7 +2244,6 @@ static int ocfs2_inode_lock_update(struct inode *inode,
2330bail_refresh: 2244bail_refresh:
2331 ocfs2_complete_lock_res_refresh(lockres, status); 2245 ocfs2_complete_lock_res_refresh(lockres, status);
2332bail: 2246bail:
2333 mlog_exit(status);
2334 return status; 2247 return status;
2335} 2248}
2336 2249
@@ -2374,8 +2287,6 @@ int ocfs2_inode_lock_full_nested(struct inode *inode,
2374 2287
2375 BUG_ON(!inode); 2288 BUG_ON(!inode);
2376 2289
2377 mlog_entry_void();
2378
2379 mlog(0, "inode %llu, take %s META lock\n", 2290 mlog(0, "inode %llu, take %s META lock\n",
2380 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2291 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2381 ex ? "EXMODE" : "PRMODE"); 2292 ex ? "EXMODE" : "PRMODE");
@@ -2467,7 +2378,6 @@ bail:
2467 if (local_bh) 2378 if (local_bh)
2468 brelse(local_bh); 2379 brelse(local_bh);
2469 2380
2470 mlog_exit(status);
2471 return status; 2381 return status;
2472} 2382}
2473 2383
@@ -2517,7 +2427,6 @@ int ocfs2_inode_lock_atime(struct inode *inode,
2517{ 2427{
2518 int ret; 2428 int ret;
2519 2429
2520 mlog_entry_void();
2521 ret = ocfs2_inode_lock(inode, NULL, 0); 2430 ret = ocfs2_inode_lock(inode, NULL, 0);
2522 if (ret < 0) { 2431 if (ret < 0) {
2523 mlog_errno(ret); 2432 mlog_errno(ret);
@@ -2545,7 +2454,6 @@ int ocfs2_inode_lock_atime(struct inode *inode,
2545 } else 2454 } else
2546 *level = 0; 2455 *level = 0;
2547 2456
2548 mlog_exit(ret);
2549 return ret; 2457 return ret;
2550} 2458}
2551 2459
@@ -2556,8 +2464,6 @@ void ocfs2_inode_unlock(struct inode *inode,
2556 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; 2464 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
2557 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2465 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2558 2466
2559 mlog_entry_void();
2560
2561 mlog(0, "inode %llu drop %s META lock\n", 2467 mlog(0, "inode %llu drop %s META lock\n",
2562 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2468 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2563 ex ? "EXMODE" : "PRMODE"); 2469 ex ? "EXMODE" : "PRMODE");
@@ -2565,8 +2471,6 @@ void ocfs2_inode_unlock(struct inode *inode,
2565 if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && 2471 if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
2566 !ocfs2_mount_local(osb)) 2472 !ocfs2_mount_local(osb))
2567 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 2473 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
2568
2569 mlog_exit_void();
2570} 2474}
2571 2475
2572int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) 2476int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
@@ -2617,8 +2521,6 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
2617 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2521 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2618 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2522 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2619 2523
2620 mlog_entry_void();
2621
2622 if (ocfs2_is_hard_readonly(osb)) 2524 if (ocfs2_is_hard_readonly(osb))
2623 return -EROFS; 2525 return -EROFS;
2624 2526
@@ -2650,7 +2552,6 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
2650 ocfs2_track_lock_refresh(lockres); 2552 ocfs2_track_lock_refresh(lockres);
2651 } 2553 }
2652bail: 2554bail:
2653 mlog_exit(status);
2654 return status; 2555 return status;
2655} 2556}
2656 2557
@@ -2869,8 +2770,15 @@ static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
2869 return iter; 2770 return iter;
2870} 2771}
2871 2772
2872/* So that debugfs.ocfs2 can determine which format is being used */ 2773/*
2873#define OCFS2_DLM_DEBUG_STR_VERSION 2 2774 * Version is used by debugfs.ocfs2 to determine the format being used
2775 *
2776 * New in version 2
2777 * - Lock stats printed
2778 * New in version 3
2779 * - Max time in lock stats is in usecs (instead of nsecs)
2780 */
2781#define OCFS2_DLM_DEBUG_STR_VERSION 3
2874static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) 2782static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2875{ 2783{
2876 int i; 2784 int i;
@@ -2912,18 +2820,18 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2912 seq_printf(m, "0x%x\t", lvb[i]); 2820 seq_printf(m, "0x%x\t", lvb[i]);
2913 2821
2914#ifdef CONFIG_OCFS2_FS_STATS 2822#ifdef CONFIG_OCFS2_FS_STATS
2915# define lock_num_prmode(_l) (_l)->l_lock_num_prmode 2823# define lock_num_prmode(_l) ((_l)->l_lock_prmode.ls_gets)
2916# define lock_num_exmode(_l) (_l)->l_lock_num_exmode 2824# define lock_num_exmode(_l) ((_l)->l_lock_exmode.ls_gets)
2917# define lock_num_prmode_failed(_l) (_l)->l_lock_num_prmode_failed 2825# define lock_num_prmode_failed(_l) ((_l)->l_lock_prmode.ls_fail)
2918# define lock_num_exmode_failed(_l) (_l)->l_lock_num_exmode_failed 2826# define lock_num_exmode_failed(_l) ((_l)->l_lock_exmode.ls_fail)
2919# define lock_total_prmode(_l) (_l)->l_lock_total_prmode 2827# define lock_total_prmode(_l) ((_l)->l_lock_prmode.ls_total)
2920# define lock_total_exmode(_l) (_l)->l_lock_total_exmode 2828# define lock_total_exmode(_l) ((_l)->l_lock_exmode.ls_total)
2921# define lock_max_prmode(_l) (_l)->l_lock_max_prmode 2829# define lock_max_prmode(_l) ((_l)->l_lock_prmode.ls_max)
2922# define lock_max_exmode(_l) (_l)->l_lock_max_exmode 2830# define lock_max_exmode(_l) ((_l)->l_lock_exmode.ls_max)
2923# define lock_refresh(_l) (_l)->l_lock_refresh 2831# define lock_refresh(_l) ((_l)->l_lock_refresh)
2924#else 2832#else
2925# define lock_num_prmode(_l) (0ULL) 2833# define lock_num_prmode(_l) (0)
2926# define lock_num_exmode(_l) (0ULL) 2834# define lock_num_exmode(_l) (0)
2927# define lock_num_prmode_failed(_l) (0) 2835# define lock_num_prmode_failed(_l) (0)
2928# define lock_num_exmode_failed(_l) (0) 2836# define lock_num_exmode_failed(_l) (0)
2929# define lock_total_prmode(_l) (0ULL) 2837# define lock_total_prmode(_l) (0ULL)
@@ -2933,8 +2841,8 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2933# define lock_refresh(_l) (0) 2841# define lock_refresh(_l) (0)
2934#endif 2842#endif
2935 /* The following seq_print was added in version 2 of this output */ 2843 /* The following seq_print was added in version 2 of this output */
2936 seq_printf(m, "%llu\t" 2844 seq_printf(m, "%u\t"
2937 "%llu\t" 2845 "%u\t"
2938 "%u\t" 2846 "%u\t"
2939 "%u\t" 2847 "%u\t"
2940 "%llu\t" 2848 "%llu\t"
@@ -3054,8 +2962,6 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
3054 int status = 0; 2962 int status = 0;
3055 struct ocfs2_cluster_connection *conn = NULL; 2963 struct ocfs2_cluster_connection *conn = NULL;
3056 2964
3057 mlog_entry_void();
3058
3059 if (ocfs2_mount_local(osb)) { 2965 if (ocfs2_mount_local(osb)) {
3060 osb->node_num = 0; 2966 osb->node_num = 0;
3061 goto local; 2967 goto local;
@@ -3112,15 +3018,12 @@ bail:
3112 kthread_stop(osb->dc_task); 3018 kthread_stop(osb->dc_task);
3113 } 3019 }
3114 3020
3115 mlog_exit(status);
3116 return status; 3021 return status;
3117} 3022}
3118 3023
3119void ocfs2_dlm_shutdown(struct ocfs2_super *osb, 3024void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
3120 int hangup_pending) 3025 int hangup_pending)
3121{ 3026{
3122 mlog_entry_void();
3123
3124 ocfs2_drop_osb_locks(osb); 3027 ocfs2_drop_osb_locks(osb);
3125 3028
3126 /* 3029 /*
@@ -3143,8 +3046,6 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
3143 osb->cconn = NULL; 3046 osb->cconn = NULL;
3144 3047
3145 ocfs2_dlm_shutdown_debug(osb); 3048 ocfs2_dlm_shutdown_debug(osb);
3146
3147 mlog_exit_void();
3148} 3049}
3149 3050
3150static int ocfs2_drop_lock(struct ocfs2_super *osb, 3051static int ocfs2_drop_lock(struct ocfs2_super *osb,
@@ -3226,7 +3127,6 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
3226 3127
3227 ocfs2_wait_on_busy_lock(lockres); 3128 ocfs2_wait_on_busy_lock(lockres);
3228out: 3129out:
3229 mlog_exit(0);
3230 return 0; 3130 return 0;
3231} 3131}
3232 3132
@@ -3284,8 +3184,6 @@ int ocfs2_drop_inode_locks(struct inode *inode)
3284{ 3184{
3285 int status, err; 3185 int status, err;
3286 3186
3287 mlog_entry_void();
3288
3289 /* No need to call ocfs2_mark_lockres_freeing here - 3187 /* No need to call ocfs2_mark_lockres_freeing here -
3290 * ocfs2_clear_inode has done it for us. */ 3188 * ocfs2_clear_inode has done it for us. */
3291 3189
@@ -3310,7 +3208,6 @@ int ocfs2_drop_inode_locks(struct inode *inode)
3310 if (err < 0 && !status) 3208 if (err < 0 && !status)
3311 status = err; 3209 status = err;
3312 3210
3313 mlog_exit(status);
3314 return status; 3211 return status;
3315} 3212}
3316 3213
@@ -3352,8 +3249,6 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3352 int ret; 3249 int ret;
3353 u32 dlm_flags = DLM_LKF_CONVERT; 3250 u32 dlm_flags = DLM_LKF_CONVERT;
3354 3251
3355 mlog_entry_void();
3356
3357 mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, 3252 mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
3358 lockres->l_level, new_level); 3253 lockres->l_level, new_level);
3359 3254
@@ -3375,7 +3270,6 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3375 3270
3376 ret = 0; 3271 ret = 0;
3377bail: 3272bail:
3378 mlog_exit(ret);
3379 return ret; 3273 return ret;
3380} 3274}
3381 3275
@@ -3385,8 +3279,6 @@ static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
3385{ 3279{
3386 assert_spin_locked(&lockres->l_lock); 3280 assert_spin_locked(&lockres->l_lock);
3387 3281
3388 mlog_entry_void();
3389
3390 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 3282 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
3391 /* If we're already trying to cancel a lock conversion 3283 /* If we're already trying to cancel a lock conversion
3392 * then just drop the spinlock and allow the caller to 3284 * then just drop the spinlock and allow the caller to
@@ -3416,8 +3308,6 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3416{ 3308{
3417 int ret; 3309 int ret;
3418 3310
3419 mlog_entry_void();
3420
3421 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, 3311 ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
3422 DLM_LKF_CANCEL); 3312 DLM_LKF_CANCEL);
3423 if (ret) { 3313 if (ret) {
@@ -3427,7 +3317,6 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3427 3317
3428 mlog(ML_BASTS, "lockres %s\n", lockres->l_name); 3318 mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
3429 3319
3430 mlog_exit(ret);
3431 return ret; 3320 return ret;
3432} 3321}
3433 3322
@@ -3443,8 +3332,6 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3443 int set_lvb = 0; 3332 int set_lvb = 0;
3444 unsigned int gen; 3333 unsigned int gen;
3445 3334
3446 mlog_entry_void();
3447
3448 spin_lock_irqsave(&lockres->l_lock, flags); 3335 spin_lock_irqsave(&lockres->l_lock, flags);
3449 3336
3450recheck: 3337recheck:
@@ -3619,14 +3506,14 @@ downconvert:
3619 gen); 3506 gen);
3620 3507
3621leave: 3508leave:
3622 mlog_exit(ret); 3509 if (ret)
3510 mlog_errno(ret);
3623 return ret; 3511 return ret;
3624 3512
3625leave_requeue: 3513leave_requeue:
3626 spin_unlock_irqrestore(&lockres->l_lock, flags); 3514 spin_unlock_irqrestore(&lockres->l_lock, flags);
3627 ctl->requeue = 1; 3515 ctl->requeue = 1;
3628 3516
3629 mlog_exit(0);
3630 return 0; 3517 return 0;
3631} 3518}
3632 3519
@@ -3859,8 +3746,6 @@ static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
3859 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 3746 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
3860 oinfo->dqi_gi.dqi_type); 3747 oinfo->dqi_gi.dqi_type);
3861 3748
3862 mlog_entry_void();
3863
3864 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 3749 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
3865 lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; 3750 lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
3866 lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); 3751 lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
@@ -3869,8 +3754,6 @@ static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
3869 lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); 3754 lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
3870 lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); 3755 lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
3871 lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); 3756 lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
3872
3873 mlog_exit_void();
3874} 3757}
3875 3758
3876void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) 3759void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
@@ -3879,10 +3762,8 @@ void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
3879 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 3762 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
3880 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 3763 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3881 3764
3882 mlog_entry_void();
3883 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) 3765 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
3884 ocfs2_cluster_unlock(osb, lockres, level); 3766 ocfs2_cluster_unlock(osb, lockres, level);
3885 mlog_exit_void();
3886} 3767}
3887 3768
3888static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) 3769static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
@@ -3937,8 +3818,6 @@ int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
3937 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 3818 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3938 int status = 0; 3819 int status = 0;
3939 3820
3940 mlog_entry_void();
3941
3942 /* On RO devices, locking really isn't needed... */ 3821 /* On RO devices, locking really isn't needed... */
3943 if (ocfs2_is_hard_readonly(osb)) { 3822 if (ocfs2_is_hard_readonly(osb)) {
3944 if (ex) 3823 if (ex)
@@ -3961,7 +3840,6 @@ int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
3961 ocfs2_qinfo_unlock(oinfo, ex); 3840 ocfs2_qinfo_unlock(oinfo, ex);
3962 ocfs2_complete_lock_res_refresh(lockres, status); 3841 ocfs2_complete_lock_res_refresh(lockres, status);
3963bail: 3842bail:
3964 mlog_exit(status);
3965 return status; 3843 return status;
3966} 3844}
3967 3845
@@ -4007,8 +3885,6 @@ static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
4007 * considered valid until we remove the OCFS2_LOCK_QUEUED 3885 * considered valid until we remove the OCFS2_LOCK_QUEUED
4008 * flag. */ 3886 * flag. */
4009 3887
4010 mlog_entry_void();
4011
4012 BUG_ON(!lockres); 3888 BUG_ON(!lockres);
4013 BUG_ON(!lockres->l_ops); 3889 BUG_ON(!lockres->l_ops);
4014 3890
@@ -4042,15 +3918,11 @@ unqueue:
4042 if (ctl.unblock_action != UNBLOCK_CONTINUE 3918 if (ctl.unblock_action != UNBLOCK_CONTINUE
4043 && lockres->l_ops->post_unlock) 3919 && lockres->l_ops->post_unlock)
4044 lockres->l_ops->post_unlock(osb, lockres); 3920 lockres->l_ops->post_unlock(osb, lockres);
4045
4046 mlog_exit_void();
4047} 3921}
4048 3922
4049static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 3923static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
4050 struct ocfs2_lock_res *lockres) 3924 struct ocfs2_lock_res *lockres)
4051{ 3925{
4052 mlog_entry_void();
4053
4054 assert_spin_locked(&lockres->l_lock); 3926 assert_spin_locked(&lockres->l_lock);
4055 3927
4056 if (lockres->l_flags & OCFS2_LOCK_FREEING) { 3928 if (lockres->l_flags & OCFS2_LOCK_FREEING) {
@@ -4071,8 +3943,6 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
4071 osb->blocked_lock_count++; 3943 osb->blocked_lock_count++;
4072 } 3944 }
4073 spin_unlock(&osb->dc_task_lock); 3945 spin_unlock(&osb->dc_task_lock);
4074
4075 mlog_exit_void();
4076} 3946}
4077 3947
4078static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) 3948static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
@@ -4080,8 +3950,6 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
4080 unsigned long processed; 3950 unsigned long processed;
4081 struct ocfs2_lock_res *lockres; 3951 struct ocfs2_lock_res *lockres;
4082 3952
4083 mlog_entry_void();
4084
4085 spin_lock(&osb->dc_task_lock); 3953 spin_lock(&osb->dc_task_lock);
4086 /* grab this early so we know to try again if a state change and 3954 /* grab this early so we know to try again if a state change and
4087 * wake happens part-way through our work */ 3955 * wake happens part-way through our work */
@@ -4105,8 +3973,6 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
4105 spin_lock(&osb->dc_task_lock); 3973 spin_lock(&osb->dc_task_lock);
4106 } 3974 }
4107 spin_unlock(&osb->dc_task_lock); 3975 spin_unlock(&osb->dc_task_lock);
4108
4109 mlog_exit_void();
4110} 3976}
4111 3977
4112static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) 3978static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 254652a9b542..745db42528d5 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -26,7 +26,6 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/types.h> 27#include <linux/types.h>
28 28
29#define MLOG_MASK_PREFIX ML_EXPORT
30#include <cluster/masklog.h> 29#include <cluster/masklog.h>
31 30
32#include "ocfs2.h" 31#include "ocfs2.h"
@@ -40,6 +39,7 @@
40 39
41#include "buffer_head_io.h" 40#include "buffer_head_io.h"
42#include "suballoc.h" 41#include "suballoc.h"
42#include "ocfs2_trace.h"
43 43
44struct ocfs2_inode_handle 44struct ocfs2_inode_handle
45{ 45{
@@ -56,10 +56,9 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
56 int status, set; 56 int status, set;
57 struct dentry *result; 57 struct dentry *result;
58 58
59 mlog_entry("(0x%p, 0x%p)\n", sb, handle); 59 trace_ocfs2_get_dentry_begin(sb, handle, (unsigned long long)blkno);
60 60
61 if (blkno == 0) { 61 if (blkno == 0) {
62 mlog(0, "nfs wants inode with blkno: 0\n");
63 result = ERR_PTR(-ESTALE); 62 result = ERR_PTR(-ESTALE);
64 goto bail; 63 goto bail;
65 } 64 }
@@ -83,6 +82,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
83 } 82 }
84 83
85 status = ocfs2_test_inode_bit(osb, blkno, &set); 84 status = ocfs2_test_inode_bit(osb, blkno, &set);
85 trace_ocfs2_get_dentry_test_bit(status, set);
86 if (status < 0) { 86 if (status < 0) {
87 if (status == -EINVAL) { 87 if (status == -EINVAL) {
88 /* 88 /*
@@ -90,18 +90,14 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
90 * as an inode, we return -ESTALE to be 90 * as an inode, we return -ESTALE to be
91 * nice 91 * nice
92 */ 92 */
93 mlog(0, "test inode bit failed %d\n", status);
94 status = -ESTALE; 93 status = -ESTALE;
95 } else { 94 } else
96 mlog(ML_ERROR, "test inode bit failed %d\n", status); 95 mlog(ML_ERROR, "test inode bit failed %d\n", status);
97 }
98 goto unlock_nfs_sync; 96 goto unlock_nfs_sync;
99 } 97 }
100 98
101 /* If the inode allocator bit is clear, this inode must be stale */ 99 /* If the inode allocator bit is clear, this inode must be stale */
102 if (!set) { 100 if (!set) {
103 mlog(0, "inode %llu suballoc bit is clear\n",
104 (unsigned long long)blkno);
105 status = -ESTALE; 101 status = -ESTALE;
106 goto unlock_nfs_sync; 102 goto unlock_nfs_sync;
107 } 103 }
@@ -114,8 +110,8 @@ unlock_nfs_sync:
114check_err: 110check_err:
115 if (status < 0) { 111 if (status < 0) {
116 if (status == -ESTALE) { 112 if (status == -ESTALE) {
117 mlog(0, "stale inode ino: %llu generation: %u\n", 113 trace_ocfs2_get_dentry_stale((unsigned long long)blkno,
118 (unsigned long long)blkno, handle->ih_generation); 114 handle->ih_generation);
119 } 115 }
120 result = ERR_PTR(status); 116 result = ERR_PTR(status);
121 goto bail; 117 goto bail;
@@ -130,8 +126,9 @@ check_err:
130check_gen: 126check_gen:
131 if (handle->ih_generation != inode->i_generation) { 127 if (handle->ih_generation != inode->i_generation) {
132 iput(inode); 128 iput(inode);
133 mlog(0, "stale inode ino: %llu generation: %u\n", 129 trace_ocfs2_get_dentry_generation((unsigned long long)blkno,
134 (unsigned long long)blkno, handle->ih_generation); 130 handle->ih_generation,
131 inode->i_generation);
135 result = ERR_PTR(-ESTALE); 132 result = ERR_PTR(-ESTALE);
136 goto bail; 133 goto bail;
137 } 134 }
@@ -141,7 +138,7 @@ check_gen:
141 mlog_errno(PTR_ERR(result)); 138 mlog_errno(PTR_ERR(result));
142 139
143bail: 140bail:
144 mlog_exit_ptr(result); 141 trace_ocfs2_get_dentry_end(result);
145 return result; 142 return result;
146} 143}
147 144
@@ -152,11 +149,8 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
152 struct dentry *parent; 149 struct dentry *parent;
153 struct inode *dir = child->d_inode; 150 struct inode *dir = child->d_inode;
154 151
155 mlog_entry("(0x%p, '%.*s')\n", child, 152 trace_ocfs2_get_parent(child, child->d_name.len, child->d_name.name,
156 child->d_name.len, child->d_name.name); 153 (unsigned long long)OCFS2_I(dir)->ip_blkno);
157
158 mlog(0, "find parent of directory %llu\n",
159 (unsigned long long)OCFS2_I(dir)->ip_blkno);
160 154
161 status = ocfs2_inode_lock(dir, NULL, 0); 155 status = ocfs2_inode_lock(dir, NULL, 0);
162 if (status < 0) { 156 if (status < 0) {
@@ -178,7 +172,7 @@ bail_unlock:
178 ocfs2_inode_unlock(dir, 0); 172 ocfs2_inode_unlock(dir, 0);
179 173
180bail: 174bail:
181 mlog_exit_ptr(parent); 175 trace_ocfs2_get_parent_end(parent);
182 176
183 return parent; 177 return parent;
184} 178}
@@ -193,9 +187,9 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
193 u32 generation; 187 u32 generation;
194 __le32 *fh = (__force __le32 *) fh_in; 188 __le32 *fh = (__force __le32 *) fh_in;
195 189
196 mlog_entry("(0x%p, '%.*s', 0x%p, %d, %d)\n", dentry, 190 trace_ocfs2_encode_fh_begin(dentry, dentry->d_name.len,
197 dentry->d_name.len, dentry->d_name.name, 191 dentry->d_name.name,
198 fh, len, connectable); 192 fh, len, connectable);
199 193
200 if (connectable && (len < 6)) { 194 if (connectable && (len < 6)) {
201 *max_len = 6; 195 *max_len = 6;
@@ -210,8 +204,7 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
210 blkno = OCFS2_I(inode)->ip_blkno; 204 blkno = OCFS2_I(inode)->ip_blkno;
211 generation = inode->i_generation; 205 generation = inode->i_generation;
212 206
213 mlog(0, "Encoding fh: blkno: %llu, generation: %u\n", 207 trace_ocfs2_encode_fh_self((unsigned long long)blkno, generation);
214 (unsigned long long)blkno, generation);
215 208
216 len = 3; 209 len = 3;
217 fh[0] = cpu_to_le32((u32)(blkno >> 32)); 210 fh[0] = cpu_to_le32((u32)(blkno >> 32));
@@ -236,14 +229,14 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
236 len = 6; 229 len = 6;
237 type = 2; 230 type = 2;
238 231
239 mlog(0, "Encoding parent: blkno: %llu, generation: %u\n", 232 trace_ocfs2_encode_fh_parent((unsigned long long)blkno,
240 (unsigned long long)blkno, generation); 233 generation);
241 } 234 }
242 235
243 *max_len = len; 236 *max_len = len;
244 237
245bail: 238bail:
246 mlog_exit(type); 239 trace_ocfs2_encode_fh_type(type);
247 return type; 240 return type;
248} 241}
249 242
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 09e3fdfa6d33..23457b491e8c 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -28,7 +28,6 @@
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/fiemap.h> 29#include <linux/fiemap.h>
30 30
31#define MLOG_MASK_PREFIX ML_EXTENT_MAP
32#include <cluster/masklog.h> 31#include <cluster/masklog.h>
33 32
34#include "ocfs2.h" 33#include "ocfs2.h"
@@ -39,6 +38,7 @@
39#include "inode.h" 38#include "inode.h"
40#include "super.h" 39#include "super.h"
41#include "symlink.h" 40#include "symlink.h"
41#include "ocfs2_trace.h"
42 42
43#include "buffer_head_io.h" 43#include "buffer_head_io.h"
44 44
@@ -841,10 +841,9 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
841 u64 p_block, p_count; 841 u64 p_block, p_count;
842 int i, count, done = 0; 842 int i, count, done = 0;
843 843
844 mlog_entry("(inode = %p, v_block = %llu, nr = %d, bhs = %p, " 844 trace_ocfs2_read_virt_blocks(
845 "flags = %x, validate = %p)\n", 845 inode, (unsigned long long)v_block, nr, bhs, flags,
846 inode, (unsigned long long)v_block, nr, bhs, flags, 846 validate);
847 validate);
848 847
849 if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >= 848 if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
850 i_size_read(inode)) { 849 i_size_read(inode)) {
@@ -897,7 +896,6 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
897 } 896 }
898 897
899out: 898out:
900 mlog_exit(rc);
901 return rc; 899 return rc;
902} 900}
903 901
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a6651956482e..41565ae52856 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -38,7 +38,6 @@
38#include <linux/quotaops.h> 38#include <linux/quotaops.h>
39#include <linux/blkdev.h> 39#include <linux/blkdev.h>
40 40
41#define MLOG_MASK_PREFIX ML_INODE
42#include <cluster/masklog.h> 41#include <cluster/masklog.h>
43 42
44#include "ocfs2.h" 43#include "ocfs2.h"
@@ -61,6 +60,7 @@
61#include "acl.h" 60#include "acl.h"
62#include "quota.h" 61#include "quota.h"
63#include "refcounttree.h" 62#include "refcounttree.h"
63#include "ocfs2_trace.h"
64 64
65#include "buffer_head_io.h" 65#include "buffer_head_io.h"
66 66
@@ -99,8 +99,10 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
99 int mode = file->f_flags; 99 int mode = file->f_flags;
100 struct ocfs2_inode_info *oi = OCFS2_I(inode); 100 struct ocfs2_inode_info *oi = OCFS2_I(inode);
101 101
102 mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, 102 trace_ocfs2_file_open(inode, file, file->f_path.dentry,
103 file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name); 103 (unsigned long long)OCFS2_I(inode)->ip_blkno,
104 file->f_path.dentry->d_name.len,
105 file->f_path.dentry->d_name.name, mode);
104 106
105 if (file->f_mode & FMODE_WRITE) 107 if (file->f_mode & FMODE_WRITE)
106 dquot_initialize(inode); 108 dquot_initialize(inode);
@@ -135,7 +137,6 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
135 } 137 }
136 138
137leave: 139leave:
138 mlog_exit(status);
139 return status; 140 return status;
140} 141}
141 142
@@ -143,19 +144,19 @@ static int ocfs2_file_release(struct inode *inode, struct file *file)
143{ 144{
144 struct ocfs2_inode_info *oi = OCFS2_I(inode); 145 struct ocfs2_inode_info *oi = OCFS2_I(inode);
145 146
146 mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
147 file->f_path.dentry->d_name.len,
148 file->f_path.dentry->d_name.name);
149
150 spin_lock(&oi->ip_lock); 147 spin_lock(&oi->ip_lock);
151 if (!--oi->ip_open_count) 148 if (!--oi->ip_open_count)
152 oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; 149 oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT;
150
151 trace_ocfs2_file_release(inode, file, file->f_path.dentry,
152 oi->ip_blkno,
153 file->f_path.dentry->d_name.len,
154 file->f_path.dentry->d_name.name,
155 oi->ip_open_count);
153 spin_unlock(&oi->ip_lock); 156 spin_unlock(&oi->ip_lock);
154 157
155 ocfs2_free_file_private(inode, file); 158 ocfs2_free_file_private(inode, file);
156 159
157 mlog_exit(0);
158
159 return 0; 160 return 0;
160} 161}
161 162
@@ -177,9 +178,11 @@ static int ocfs2_sync_file(struct file *file, int datasync)
177 struct inode *inode = file->f_mapping->host; 178 struct inode *inode = file->f_mapping->host;
178 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 179 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
179 180
180 mlog_entry("(0x%p, %d, 0x%p, '%.*s')\n", file, datasync, 181 trace_ocfs2_sync_file(inode, file, file->f_path.dentry,
181 file->f_path.dentry, file->f_path.dentry->d_name.len, 182 OCFS2_I(inode)->ip_blkno,
182 file->f_path.dentry->d_name.name); 183 file->f_path.dentry->d_name.len,
184 file->f_path.dentry->d_name.name,
185 (unsigned long long)datasync);
183 186
184 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { 187 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
185 /* 188 /*
@@ -195,7 +198,8 @@ static int ocfs2_sync_file(struct file *file, int datasync)
195 err = jbd2_journal_force_commit(journal); 198 err = jbd2_journal_force_commit(journal);
196 199
197bail: 200bail:
198 mlog_exit(err); 201 if (err)
202 mlog_errno(err);
199 203
200 return (err < 0) ? -EIO : 0; 204 return (err < 0) ? -EIO : 0;
201} 205}
@@ -251,8 +255,6 @@ int ocfs2_update_inode_atime(struct inode *inode,
251 handle_t *handle; 255 handle_t *handle;
252 struct ocfs2_dinode *di = (struct ocfs2_dinode *) bh->b_data; 256 struct ocfs2_dinode *di = (struct ocfs2_dinode *) bh->b_data;
253 257
254 mlog_entry_void();
255
256 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 258 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
257 if (IS_ERR(handle)) { 259 if (IS_ERR(handle)) {
258 ret = PTR_ERR(handle); 260 ret = PTR_ERR(handle);
@@ -280,7 +282,6 @@ int ocfs2_update_inode_atime(struct inode *inode,
280out_commit: 282out_commit:
281 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 283 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
282out: 284out:
283 mlog_exit(ret);
284 return ret; 285 return ret;
285} 286}
286 287
@@ -291,7 +292,6 @@ static int ocfs2_set_inode_size(handle_t *handle,
291{ 292{
292 int status; 293 int status;
293 294
294 mlog_entry_void();
295 i_size_write(inode, new_i_size); 295 i_size_write(inode, new_i_size);
296 inode->i_blocks = ocfs2_inode_sector_count(inode); 296 inode->i_blocks = ocfs2_inode_sector_count(inode);
297 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 297 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
@@ -303,7 +303,6 @@ static int ocfs2_set_inode_size(handle_t *handle,
303 } 303 }
304 304
305bail: 305bail:
306 mlog_exit(status);
307 return status; 306 return status;
308} 307}
309 308
@@ -375,8 +374,6 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
375 struct ocfs2_dinode *di; 374 struct ocfs2_dinode *di;
376 u64 cluster_bytes; 375 u64 cluster_bytes;
377 376
378 mlog_entry_void();
379
380 /* 377 /*
381 * We need to CoW the cluster contains the offset if it is reflinked 378 * We need to CoW the cluster contains the offset if it is reflinked
382 * since we will call ocfs2_zero_range_for_truncate later which will 379 * since we will call ocfs2_zero_range_for_truncate later which will
@@ -429,8 +426,6 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
429out_commit: 426out_commit:
430 ocfs2_commit_trans(osb, handle); 427 ocfs2_commit_trans(osb, handle);
431out: 428out:
432
433 mlog_exit(status);
434 return status; 429 return status;
435} 430}
436 431
@@ -442,14 +437,14 @@ static int ocfs2_truncate_file(struct inode *inode,
442 struct ocfs2_dinode *fe = NULL; 437 struct ocfs2_dinode *fe = NULL;
443 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 438 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
444 439
445 mlog_entry("(inode = %llu, new_i_size = %llu\n",
446 (unsigned long long)OCFS2_I(inode)->ip_blkno,
447 (unsigned long long)new_i_size);
448
449 /* We trust di_bh because it comes from ocfs2_inode_lock(), which 440 /* We trust di_bh because it comes from ocfs2_inode_lock(), which
450 * already validated it */ 441 * already validated it */
451 fe = (struct ocfs2_dinode *) di_bh->b_data; 442 fe = (struct ocfs2_dinode *) di_bh->b_data;
452 443
444 trace_ocfs2_truncate_file((unsigned long long)OCFS2_I(inode)->ip_blkno,
445 (unsigned long long)le64_to_cpu(fe->i_size),
446 (unsigned long long)new_i_size);
447
453 mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode), 448 mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
454 "Inode %llu, inode i_size = %lld != di " 449 "Inode %llu, inode i_size = %lld != di "
455 "i_size = %llu, i_flags = 0x%x\n", 450 "i_size = %llu, i_flags = 0x%x\n",
@@ -459,19 +454,14 @@ static int ocfs2_truncate_file(struct inode *inode,
459 le32_to_cpu(fe->i_flags)); 454 le32_to_cpu(fe->i_flags));
460 455
461 if (new_i_size > le64_to_cpu(fe->i_size)) { 456 if (new_i_size > le64_to_cpu(fe->i_size)) {
462 mlog(0, "asked to truncate file with size (%llu) to size (%llu)!\n", 457 trace_ocfs2_truncate_file_error(
463 (unsigned long long)le64_to_cpu(fe->i_size), 458 (unsigned long long)le64_to_cpu(fe->i_size),
464 (unsigned long long)new_i_size); 459 (unsigned long long)new_i_size);
465 status = -EINVAL; 460 status = -EINVAL;
466 mlog_errno(status); 461 mlog_errno(status);
467 goto bail; 462 goto bail;
468 } 463 }
469 464
470 mlog(0, "inode %llu, i_size = %llu, new_i_size = %llu\n",
471 (unsigned long long)le64_to_cpu(fe->i_blkno),
472 (unsigned long long)le64_to_cpu(fe->i_size),
473 (unsigned long long)new_i_size);
474
475 /* lets handle the simple truncate cases before doing any more 465 /* lets handle the simple truncate cases before doing any more
476 * cluster locking. */ 466 * cluster locking. */
477 if (new_i_size == le64_to_cpu(fe->i_size)) 467 if (new_i_size == le64_to_cpu(fe->i_size))
@@ -525,7 +515,6 @@ bail:
525 if (!status && OCFS2_I(inode)->ip_clusters == 0) 515 if (!status && OCFS2_I(inode)->ip_clusters == 0)
526 status = ocfs2_try_remove_refcount_tree(inode, di_bh); 516 status = ocfs2_try_remove_refcount_tree(inode, di_bh);
527 517
528 mlog_exit(status);
529 return status; 518 return status;
530} 519}
531 520
@@ -578,8 +567,6 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
578 struct ocfs2_extent_tree et; 567 struct ocfs2_extent_tree et;
579 int did_quota = 0; 568 int did_quota = 0;
580 569
581 mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
582
583 /* 570 /*
584 * This function only exists for file systems which don't 571 * This function only exists for file systems which don't
585 * support holes. 572 * support holes.
@@ -596,11 +583,6 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
596restart_all: 583restart_all:
597 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); 584 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
598 585
599 mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
600 "clusters_to_add = %u\n",
601 (unsigned long long)OCFS2_I(inode)->ip_blkno,
602 (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters),
603 clusters_to_add);
604 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh); 586 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh);
605 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, 587 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
606 &data_ac, &meta_ac); 588 &data_ac, &meta_ac);
@@ -620,6 +602,12 @@ restart_all:
620 } 602 }
621 603
622restarted_transaction: 604restarted_transaction:
605 trace_ocfs2_extend_allocation(
606 (unsigned long long)OCFS2_I(inode)->ip_blkno,
607 (unsigned long long)i_size_read(inode),
608 le32_to_cpu(fe->i_clusters), clusters_to_add,
609 why, restart_func);
610
623 status = dquot_alloc_space_nodirty(inode, 611 status = dquot_alloc_space_nodirty(inode,
624 ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); 612 ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
625 if (status) 613 if (status)
@@ -666,13 +654,11 @@ restarted_transaction:
666 654
667 if (why != RESTART_NONE && clusters_to_add) { 655 if (why != RESTART_NONE && clusters_to_add) {
668 if (why == RESTART_META) { 656 if (why == RESTART_META) {
669 mlog(0, "restarting function.\n");
670 restart_func = 1; 657 restart_func = 1;
671 status = 0; 658 status = 0;
672 } else { 659 } else {
673 BUG_ON(why != RESTART_TRANS); 660 BUG_ON(why != RESTART_TRANS);
674 661
675 mlog(0, "restarting transaction.\n");
676 /* TODO: This can be more intelligent. */ 662 /* TODO: This can be more intelligent. */
677 credits = ocfs2_calc_extend_credits(osb->sb, 663 credits = ocfs2_calc_extend_credits(osb->sb,
678 &fe->id2.i_list, 664 &fe->id2.i_list,
@@ -689,11 +675,11 @@ restarted_transaction:
689 } 675 }
690 } 676 }
691 677
692 mlog(0, "fe: i_clusters = %u, i_size=%llu\n", 678 trace_ocfs2_extend_allocation_end(OCFS2_I(inode)->ip_blkno,
693 le32_to_cpu(fe->i_clusters), 679 le32_to_cpu(fe->i_clusters),
694 (unsigned long long)le64_to_cpu(fe->i_size)); 680 (unsigned long long)le64_to_cpu(fe->i_size),
695 mlog(0, "inode: ip_clusters=%u, i_size=%lld\n", 681 OCFS2_I(inode)->ip_clusters,
696 OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode)); 682 (unsigned long long)i_size_read(inode));
697 683
698leave: 684leave:
699 if (status < 0 && did_quota) 685 if (status < 0 && did_quota)
@@ -718,7 +704,6 @@ leave:
718 brelse(bh); 704 brelse(bh);
719 bh = NULL; 705 bh = NULL;
720 706
721 mlog_exit(status);
722 return status; 707 return status;
723} 708}
724 709
@@ -785,10 +770,11 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
785 if (!zero_to) 770 if (!zero_to)
786 zero_to = PAGE_CACHE_SIZE; 771 zero_to = PAGE_CACHE_SIZE;
787 772
788 mlog(0, 773 trace_ocfs2_write_zero_page(
789 "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n", 774 (unsigned long long)OCFS2_I(inode)->ip_blkno,
790 (unsigned long long)abs_from, (unsigned long long)abs_to, 775 (unsigned long long)abs_from,
791 index, zero_from, zero_to); 776 (unsigned long long)abs_to,
777 index, zero_from, zero_to);
792 778
793 /* We know that zero_from is block aligned */ 779 /* We know that zero_from is block aligned */
794 for (block_start = zero_from; block_start < zero_to; 780 for (block_start = zero_from; block_start < zero_to;
@@ -928,9 +914,10 @@ static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
928 u64 next_pos; 914 u64 next_pos;
929 u64 zero_pos = range_start; 915 u64 zero_pos = range_start;
930 916
931 mlog(0, "range_start = %llu, range_end = %llu\n", 917 trace_ocfs2_zero_extend_range(
932 (unsigned long long)range_start, 918 (unsigned long long)OCFS2_I(inode)->ip_blkno,
933 (unsigned long long)range_end); 919 (unsigned long long)range_start,
920 (unsigned long long)range_end);
934 BUG_ON(range_start >= range_end); 921 BUG_ON(range_start >= range_end);
935 922
936 while (zero_pos < range_end) { 923 while (zero_pos < range_end) {
@@ -962,9 +949,9 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
962 struct super_block *sb = inode->i_sb; 949 struct super_block *sb = inode->i_sb;
963 950
964 zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); 951 zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
965 mlog(0, "zero_start %llu for i_size %llu\n", 952 trace_ocfs2_zero_extend((unsigned long long)OCFS2_I(inode)->ip_blkno,
966 (unsigned long long)zero_start, 953 (unsigned long long)zero_start,
967 (unsigned long long)i_size_read(inode)); 954 (unsigned long long)i_size_read(inode));
968 while (zero_start < zero_to_size) { 955 while (zero_start < zero_to_size) {
969 ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start, 956 ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
970 zero_to_size, 957 zero_to_size,
@@ -1113,30 +1100,20 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1113 struct dquot *transfer_to[MAXQUOTAS] = { }; 1100 struct dquot *transfer_to[MAXQUOTAS] = { };
1114 int qtype; 1101 int qtype;
1115 1102
1116 mlog_entry("(0x%p, '%.*s')\n", dentry, 1103 trace_ocfs2_setattr(inode, dentry,
1117 dentry->d_name.len, dentry->d_name.name); 1104 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1105 dentry->d_name.len, dentry->d_name.name,
1106 attr->ia_valid, attr->ia_mode,
1107 attr->ia_uid, attr->ia_gid);
1118 1108
1119 /* ensuring we don't even attempt to truncate a symlink */ 1109 /* ensuring we don't even attempt to truncate a symlink */
1120 if (S_ISLNK(inode->i_mode)) 1110 if (S_ISLNK(inode->i_mode))
1121 attr->ia_valid &= ~ATTR_SIZE; 1111 attr->ia_valid &= ~ATTR_SIZE;
1122 1112
1123 if (attr->ia_valid & ATTR_MODE)
1124 mlog(0, "mode change: %d\n", attr->ia_mode);
1125 if (attr->ia_valid & ATTR_UID)
1126 mlog(0, "uid change: %d\n", attr->ia_uid);
1127 if (attr->ia_valid & ATTR_GID)
1128 mlog(0, "gid change: %d\n", attr->ia_gid);
1129 if (attr->ia_valid & ATTR_SIZE)
1130 mlog(0, "size change...\n");
1131 if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME))
1132 mlog(0, "time change...\n");
1133
1134#define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \ 1113#define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
1135 | ATTR_GID | ATTR_UID | ATTR_MODE) 1114 | ATTR_GID | ATTR_UID | ATTR_MODE)
1136 if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) { 1115 if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
1137 mlog(0, "can't handle attrs: 0x%x\n", attr->ia_valid);
1138 return 0; 1116 return 0;
1139 }
1140 1117
1141 status = inode_change_ok(inode, attr); 1118 status = inode_change_ok(inode, attr);
1142 if (status) 1119 if (status)
@@ -1274,7 +1251,6 @@ bail:
1274 mlog_errno(status); 1251 mlog_errno(status);
1275 } 1252 }
1276 1253
1277 mlog_exit(status);
1278 return status; 1254 return status;
1279} 1255}
1280 1256
@@ -1287,8 +1263,6 @@ int ocfs2_getattr(struct vfsmount *mnt,
1287 struct ocfs2_super *osb = sb->s_fs_info; 1263 struct ocfs2_super *osb = sb->s_fs_info;
1288 int err; 1264 int err;
1289 1265
1290 mlog_entry_void();
1291
1292 err = ocfs2_inode_revalidate(dentry); 1266 err = ocfs2_inode_revalidate(dentry);
1293 if (err) { 1267 if (err) {
1294 if (err != -ENOENT) 1268 if (err != -ENOENT)
@@ -1302,8 +1276,6 @@ int ocfs2_getattr(struct vfsmount *mnt,
1302 stat->blksize = osb->s_clustersize; 1276 stat->blksize = osb->s_clustersize;
1303 1277
1304bail: 1278bail:
1305 mlog_exit(err);
1306
1307 return err; 1279 return err;
1308} 1280}
1309 1281
@@ -1314,8 +1286,6 @@ int ocfs2_permission(struct inode *inode, int mask, unsigned int flags)
1314 if (flags & IPERM_FLAG_RCU) 1286 if (flags & IPERM_FLAG_RCU)
1315 return -ECHILD; 1287 return -ECHILD;
1316 1288
1317 mlog_entry_void();
1318
1319 ret = ocfs2_inode_lock(inode, NULL, 0); 1289 ret = ocfs2_inode_lock(inode, NULL, 0);
1320 if (ret) { 1290 if (ret) {
1321 if (ret != -ENOENT) 1291 if (ret != -ENOENT)
@@ -1327,7 +1297,6 @@ int ocfs2_permission(struct inode *inode, int mask, unsigned int flags)
1327 1297
1328 ocfs2_inode_unlock(inode, 0); 1298 ocfs2_inode_unlock(inode, 0);
1329out: 1299out:
1330 mlog_exit(ret);
1331 return ret; 1300 return ret;
1332} 1301}
1333 1302
@@ -1339,8 +1308,9 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
1339 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1308 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1340 struct ocfs2_dinode *di; 1309 struct ocfs2_dinode *di;
1341 1310
1342 mlog_entry("(Inode %llu, mode 0%o)\n", 1311 trace_ocfs2_write_remove_suid(
1343 (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode); 1312 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1313 inode->i_mode);
1344 1314
1345 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 1315 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1346 if (IS_ERR(handle)) { 1316 if (IS_ERR(handle)) {
@@ -1368,7 +1338,6 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
1368out_trans: 1338out_trans:
1369 ocfs2_commit_trans(osb, handle); 1339 ocfs2_commit_trans(osb, handle);
1370out: 1340out:
1371 mlog_exit(ret);
1372 return ret; 1341 return ret;
1373} 1342}
1374 1343
@@ -1547,8 +1516,9 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
1547 * partial clusters here. There's no need to worry about 1516 * partial clusters here. There's no need to worry about
1548 * physical allocation - the zeroing code knows to skip holes. 1517 * physical allocation - the zeroing code knows to skip holes.
1549 */ 1518 */
1550 mlog(0, "byte start: %llu, end: %llu\n", 1519 trace_ocfs2_zero_partial_clusters(
1551 (unsigned long long)start, (unsigned long long)end); 1520 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1521 (unsigned long long)start, (unsigned long long)end);
1552 1522
1553 /* 1523 /*
1554 * If both edges are on a cluster boundary then there's no 1524 * If both edges are on a cluster boundary then there's no
@@ -1572,8 +1542,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
1572 if (tmpend > end) 1542 if (tmpend > end)
1573 tmpend = end; 1543 tmpend = end;
1574 1544
1575 mlog(0, "1st range: start: %llu, tmpend: %llu\n", 1545 trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start,
1576 (unsigned long long)start, (unsigned long long)tmpend); 1546 (unsigned long long)tmpend);
1577 1547
1578 ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); 1548 ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend);
1579 if (ret) 1549 if (ret)
@@ -1587,8 +1557,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
1587 */ 1557 */
1588 start = end & ~(osb->s_clustersize - 1); 1558 start = end & ~(osb->s_clustersize - 1);
1589 1559
1590 mlog(0, "2nd range: start: %llu, end: %llu\n", 1560 trace_ocfs2_zero_partial_clusters_range2(
1591 (unsigned long long)start, (unsigned long long)end); 1561 (unsigned long long)start, (unsigned long long)end);
1592 1562
1593 ret = ocfs2_zero_range_for_truncate(inode, handle, start, end); 1563 ret = ocfs2_zero_range_for_truncate(inode, handle, start, end);
1594 if (ret) 1564 if (ret)
@@ -1688,6 +1658,11 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1688 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); 1658 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
1689 ocfs2_init_dealloc_ctxt(&dealloc); 1659 ocfs2_init_dealloc_ctxt(&dealloc);
1690 1660
1661 trace_ocfs2_remove_inode_range(
1662 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1663 (unsigned long long)byte_start,
1664 (unsigned long long)byte_len);
1665
1691 if (byte_len == 0) 1666 if (byte_len == 0)
1692 return 0; 1667 return 0;
1693 1668
@@ -1734,11 +1709,6 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1734 trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits; 1709 trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits;
1735 cluster_in_el = trunc_end; 1710 cluster_in_el = trunc_end;
1736 1711
1737 mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, cend: %u\n",
1738 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1739 (unsigned long long)byte_start,
1740 (unsigned long long)byte_len, trunc_start, trunc_end);
1741
1742 ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); 1712 ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
1743 if (ret) { 1713 if (ret) {
1744 mlog_errno(ret); 1714 mlog_errno(ret);
@@ -2093,7 +2063,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2093 int ret = 0, meta_level = 0; 2063 int ret = 0, meta_level = 0;
2094 struct dentry *dentry = file->f_path.dentry; 2064 struct dentry *dentry = file->f_path.dentry;
2095 struct inode *inode = dentry->d_inode; 2065 struct inode *inode = dentry->d_inode;
2096 loff_t saved_pos, end; 2066 loff_t saved_pos = 0, end;
2097 2067
2098 /* 2068 /*
2099 * We start with a read level meta lock and only jump to an ex 2069 * We start with a read level meta lock and only jump to an ex
@@ -2132,12 +2102,10 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2132 2102
2133 /* work on a copy of ppos until we're sure that we won't have 2103 /* work on a copy of ppos until we're sure that we won't have
2134 * to recalculate it due to relocking. */ 2104 * to recalculate it due to relocking. */
2135 if (appending) { 2105 if (appending)
2136 saved_pos = i_size_read(inode); 2106 saved_pos = i_size_read(inode);
2137 mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos); 2107 else
2138 } else {
2139 saved_pos = *ppos; 2108 saved_pos = *ppos;
2140 }
2141 2109
2142 end = saved_pos + count; 2110 end = saved_pos + count;
2143 2111
@@ -2208,6 +2176,10 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2208 *ppos = saved_pos; 2176 *ppos = saved_pos;
2209 2177
2210out_unlock: 2178out_unlock:
2179 trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
2180 saved_pos, appending, count,
2181 direct_io, has_refcount);
2182
2211 if (meta_level >= 0) 2183 if (meta_level >= 0)
2212 ocfs2_inode_unlock(inode, meta_level); 2184 ocfs2_inode_unlock(inode, meta_level);
2213 2185
@@ -2233,10 +2205,11 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2233 int full_coherency = !(osb->s_mount_opt & 2205 int full_coherency = !(osb->s_mount_opt &
2234 OCFS2_MOUNT_COHERENCY_BUFFERED); 2206 OCFS2_MOUNT_COHERENCY_BUFFERED);
2235 2207
2236 mlog_entry("(0x%p, %u, '%.*s')\n", file, 2208 trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
2237 (unsigned int)nr_segs, 2209 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2238 file->f_path.dentry->d_name.len, 2210 file->f_path.dentry->d_name.len,
2239 file->f_path.dentry->d_name.name); 2211 file->f_path.dentry->d_name.name,
2212 (unsigned int)nr_segs);
2240 2213
2241 if (iocb->ki_left == 0) 2214 if (iocb->ki_left == 0)
2242 return 0; 2215 return 0;
@@ -2402,7 +2375,6 @@ out_sems:
2402 2375
2403 if (written) 2376 if (written)
2404 ret = written; 2377 ret = written;
2405 mlog_exit(ret);
2406 return ret; 2378 return ret;
2407} 2379}
2408 2380
@@ -2438,10 +2410,11 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
2438 .u.file = out, 2410 .u.file = out,
2439 }; 2411 };
2440 2412
2441 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, 2413
2442 (unsigned int)len, 2414 trace_ocfs2_file_splice_write(inode, out, out->f_path.dentry,
2443 out->f_path.dentry->d_name.len, 2415 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2444 out->f_path.dentry->d_name.name); 2416 out->f_path.dentry->d_name.len,
2417 out->f_path.dentry->d_name.name, len);
2445 2418
2446 if (pipe->inode) 2419 if (pipe->inode)
2447 mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); 2420 mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
@@ -2485,7 +2458,6 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
2485 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 2458 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
2486 } 2459 }
2487 2460
2488 mlog_exit(ret);
2489 return ret; 2461 return ret;
2490} 2462}
2491 2463
@@ -2498,10 +2470,10 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
2498 int ret = 0, lock_level = 0; 2470 int ret = 0, lock_level = 0;
2499 struct inode *inode = in->f_path.dentry->d_inode; 2471 struct inode *inode = in->f_path.dentry->d_inode;
2500 2472
2501 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, 2473 trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry,
2502 (unsigned int)len, 2474 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2503 in->f_path.dentry->d_name.len, 2475 in->f_path.dentry->d_name.len,
2504 in->f_path.dentry->d_name.name); 2476 in->f_path.dentry->d_name.name, len);
2505 2477
2506 /* 2478 /*
2507 * See the comment in ocfs2_file_aio_read() 2479 * See the comment in ocfs2_file_aio_read()
@@ -2516,7 +2488,6 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
2516 ret = generic_file_splice_read(in, ppos, pipe, len, flags); 2488 ret = generic_file_splice_read(in, ppos, pipe, len, flags);
2517 2489
2518bail: 2490bail:
2519 mlog_exit(ret);
2520 return ret; 2491 return ret;
2521} 2492}
2522 2493
@@ -2529,10 +2500,11 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2529 struct file *filp = iocb->ki_filp; 2500 struct file *filp = iocb->ki_filp;
2530 struct inode *inode = filp->f_path.dentry->d_inode; 2501 struct inode *inode = filp->f_path.dentry->d_inode;
2531 2502
2532 mlog_entry("(0x%p, %u, '%.*s')\n", filp, 2503 trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
2533 (unsigned int)nr_segs, 2504 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2534 filp->f_path.dentry->d_name.len, 2505 filp->f_path.dentry->d_name.len,
2535 filp->f_path.dentry->d_name.name); 2506 filp->f_path.dentry->d_name.name, nr_segs);
2507
2536 2508
2537 if (!inode) { 2509 if (!inode) {
2538 ret = -EINVAL; 2510 ret = -EINVAL;
@@ -2578,8 +2550,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2578 ocfs2_inode_unlock(inode, lock_level); 2550 ocfs2_inode_unlock(inode, lock_level);
2579 2551
2580 ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); 2552 ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
2581 if (ret == -EINVAL) 2553 trace_generic_file_aio_read_ret(ret);
2582 mlog(0, "generic_file_aio_read returned -EINVAL\n");
2583 2554
2584 /* buffered aio wouldn't have proper lock coverage today */ 2555 /* buffered aio wouldn't have proper lock coverage today */
2585 BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); 2556 BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
@@ -2597,7 +2568,6 @@ bail:
2597 } 2568 }
2598 if (rw_level != -1) 2569 if (rw_level != -1)
2599 ocfs2_rw_unlock(inode, rw_level); 2570 ocfs2_rw_unlock(inode, rw_level);
2600 mlog_exit(ret);
2601 2571
2602 return ret; 2572 return ret;
2603} 2573}
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c
index 1aa863dd901f..d8208b20dc53 100644
--- a/fs/ocfs2/heartbeat.c
+++ b/fs/ocfs2/heartbeat.c
@@ -28,7 +28,6 @@
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30 30
31#define MLOG_MASK_PREFIX ML_SUPER
32#include <cluster/masklog.h> 31#include <cluster/masklog.h>
33 32
34#include "ocfs2.h" 33#include "ocfs2.h"
@@ -37,6 +36,7 @@
37#include "heartbeat.h" 36#include "heartbeat.h"
38#include "inode.h" 37#include "inode.h"
39#include "journal.h" 38#include "journal.h"
39#include "ocfs2_trace.h"
40 40
41#include "buffer_head_io.h" 41#include "buffer_head_io.h"
42 42
@@ -66,7 +66,7 @@ void ocfs2_do_node_down(int node_num, void *data)
66 66
67 BUG_ON(osb->node_num == node_num); 67 BUG_ON(osb->node_num == node_num);
68 68
69 mlog(0, "ocfs2: node down event for %d\n", node_num); 69 trace_ocfs2_do_node_down(node_num);
70 70
71 if (!osb->cconn) { 71 if (!osb->cconn) {
72 /* 72 /*
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 4068c6c4c6f6..177d3a6c2a5f 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -31,7 +31,6 @@
31 31
32#include <asm/byteorder.h> 32#include <asm/byteorder.h>
33 33
34#define MLOG_MASK_PREFIX ML_INODE
35#include <cluster/masklog.h> 34#include <cluster/masklog.h>
36 35
37#include "ocfs2.h" 36#include "ocfs2.h"
@@ -53,6 +52,7 @@
53#include "uptodate.h" 52#include "uptodate.h"
54#include "xattr.h" 53#include "xattr.h"
55#include "refcounttree.h" 54#include "refcounttree.h"
55#include "ocfs2_trace.h"
56 56
57#include "buffer_head_io.h" 57#include "buffer_head_io.h"
58 58
@@ -131,7 +131,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
131 struct super_block *sb = osb->sb; 131 struct super_block *sb = osb->sb;
132 struct ocfs2_find_inode_args args; 132 struct ocfs2_find_inode_args args;
133 133
134 mlog_entry("(blkno = %llu)\n", (unsigned long long)blkno); 134 trace_ocfs2_iget_begin((unsigned long long)blkno, flags,
135 sysfile_type);
135 136
136 /* Ok. By now we've either got the offsets passed to us by the 137 /* Ok. By now we've either got the offsets passed to us by the
137 * caller, or we just pulled them off the bh. Lets do some 138 * caller, or we just pulled them off the bh. Lets do some
@@ -152,16 +153,16 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
152 /* inode was *not* in the inode cache. 2.6.x requires 153 /* inode was *not* in the inode cache. 2.6.x requires
153 * us to do our own read_inode call and unlock it 154 * us to do our own read_inode call and unlock it
154 * afterwards. */ 155 * afterwards. */
155 if (inode && inode->i_state & I_NEW) {
156 mlog(0, "Inode was not in inode cache, reading it.\n");
157 ocfs2_read_locked_inode(inode, &args);
158 unlock_new_inode(inode);
159 }
160 if (inode == NULL) { 156 if (inode == NULL) {
161 inode = ERR_PTR(-ENOMEM); 157 inode = ERR_PTR(-ENOMEM);
162 mlog_errno(PTR_ERR(inode)); 158 mlog_errno(PTR_ERR(inode));
163 goto bail; 159 goto bail;
164 } 160 }
161 trace_ocfs2_iget5_locked(inode->i_state);
162 if (inode->i_state & I_NEW) {
163 ocfs2_read_locked_inode(inode, &args);
164 unlock_new_inode(inode);
165 }
165 if (is_bad_inode(inode)) { 166 if (is_bad_inode(inode)) {
166 iput(inode); 167 iput(inode);
167 inode = ERR_PTR(-ESTALE); 168 inode = ERR_PTR(-ESTALE);
@@ -170,9 +171,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
170 171
171bail: 172bail:
172 if (!IS_ERR(inode)) { 173 if (!IS_ERR(inode)) {
173 mlog(0, "returning inode with number %llu\n", 174 trace_ocfs2_iget_end(inode,
174 (unsigned long long)OCFS2_I(inode)->ip_blkno); 175 (unsigned long long)OCFS2_I(inode)->ip_blkno);
175 mlog_exit_ptr(inode);
176 } 176 }
177 177
178 return inode; 178 return inode;
@@ -192,18 +192,17 @@ static int ocfs2_find_actor(struct inode *inode, void *opaque)
192 struct ocfs2_inode_info *oi = OCFS2_I(inode); 192 struct ocfs2_inode_info *oi = OCFS2_I(inode);
193 int ret = 0; 193 int ret = 0;
194 194
195 mlog_entry("(0x%p, %lu, 0x%p)\n", inode, inode->i_ino, opaque);
196
197 args = opaque; 195 args = opaque;
198 196
199 mlog_bug_on_msg(!inode, "No inode in find actor!\n"); 197 mlog_bug_on_msg(!inode, "No inode in find actor!\n");
200 198
199 trace_ocfs2_find_actor(inode, inode->i_ino, opaque, args->fi_blkno);
200
201 if (oi->ip_blkno != args->fi_blkno) 201 if (oi->ip_blkno != args->fi_blkno)
202 goto bail; 202 goto bail;
203 203
204 ret = 1; 204 ret = 1;
205bail: 205bail:
206 mlog_exit(ret);
207 return ret; 206 return ret;
208} 207}
209 208
@@ -218,8 +217,6 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
218 static struct lock_class_key ocfs2_quota_ip_alloc_sem_key, 217 static struct lock_class_key ocfs2_quota_ip_alloc_sem_key,
219 ocfs2_file_ip_alloc_sem_key; 218 ocfs2_file_ip_alloc_sem_key;
220 219
221 mlog_entry("inode = %p, opaque = %p\n", inode, opaque);
222
223 inode->i_ino = args->fi_ino; 220 inode->i_ino = args->fi_ino;
224 OCFS2_I(inode)->ip_blkno = args->fi_blkno; 221 OCFS2_I(inode)->ip_blkno = args->fi_blkno;
225 if (args->fi_sysfile_type != 0) 222 if (args->fi_sysfile_type != 0)
@@ -235,7 +232,6 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
235 lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem, 232 lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
236 &ocfs2_file_ip_alloc_sem_key); 233 &ocfs2_file_ip_alloc_sem_key);
237 234
238 mlog_exit(0);
239 return 0; 235 return 0;
240} 236}
241 237
@@ -246,9 +242,6 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
246 struct ocfs2_super *osb; 242 struct ocfs2_super *osb;
247 int use_plocks = 1; 243 int use_plocks = 1;
248 244
249 mlog_entry("(0x%p, size:%llu)\n", inode,
250 (unsigned long long)le64_to_cpu(fe->i_size));
251
252 sb = inode->i_sb; 245 sb = inode->i_sb;
253 osb = OCFS2_SB(sb); 246 osb = OCFS2_SB(sb);
254 247
@@ -300,20 +293,20 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
300 293
301 inode->i_nlink = ocfs2_read_links_count(fe); 294 inode->i_nlink = ocfs2_read_links_count(fe);
302 295
296 trace_ocfs2_populate_inode(OCFS2_I(inode)->ip_blkno,
297 le32_to_cpu(fe->i_flags));
303 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) { 298 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) {
304 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; 299 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
305 inode->i_flags |= S_NOQUOTA; 300 inode->i_flags |= S_NOQUOTA;
306 } 301 }
307 302
308 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { 303 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
309 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; 304 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
310 mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino);
311 } else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) { 305 } else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) {
312 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; 306 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
313 } else if (fe->i_flags & cpu_to_le32(OCFS2_QUOTA_FL)) { 307 } else if (fe->i_flags & cpu_to_le32(OCFS2_QUOTA_FL)) {
314 inode->i_flags |= S_NOQUOTA; 308 inode->i_flags |= S_NOQUOTA;
315 } else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) { 309 } else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) {
316 mlog(0, "superblock inode: i_ino=%lu\n", inode->i_ino);
317 /* we can't actually hit this as read_inode can't 310 /* we can't actually hit this as read_inode can't
318 * handle superblocks today ;-) */ 311 * handle superblocks today ;-) */
319 BUG(); 312 BUG();
@@ -381,7 +374,6 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
381 if (S_ISDIR(inode->i_mode)) 374 if (S_ISDIR(inode->i_mode))
382 ocfs2_resv_set_type(&OCFS2_I(inode)->ip_la_data_resv, 375 ocfs2_resv_set_type(&OCFS2_I(inode)->ip_la_data_resv,
383 OCFS2_RESV_FLAG_DIR); 376 OCFS2_RESV_FLAG_DIR);
384 mlog_exit_void();
385} 377}
386 378
387static int ocfs2_read_locked_inode(struct inode *inode, 379static int ocfs2_read_locked_inode(struct inode *inode,
@@ -394,8 +386,6 @@ static int ocfs2_read_locked_inode(struct inode *inode,
394 int status, can_lock; 386 int status, can_lock;
395 u32 generation = 0; 387 u32 generation = 0;
396 388
397 mlog_entry("(0x%p, 0x%p)\n", inode, args);
398
399 status = -EINVAL; 389 status = -EINVAL;
400 if (inode == NULL || inode->i_sb == NULL) { 390 if (inode == NULL || inode->i_sb == NULL) {
401 mlog(ML_ERROR, "bad inode\n"); 391 mlog(ML_ERROR, "bad inode\n");
@@ -443,6 +433,9 @@ static int ocfs2_read_locked_inode(struct inode *inode,
443 && !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) 433 && !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY)
444 && !ocfs2_mount_local(osb); 434 && !ocfs2_mount_local(osb);
445 435
436 trace_ocfs2_read_locked_inode(
437 (unsigned long long)OCFS2_I(inode)->ip_blkno, can_lock);
438
446 /* 439 /*
447 * To maintain backwards compatibility with older versions of 440 * To maintain backwards compatibility with older versions of
448 * ocfs2-tools, we still store the generation value for system 441 * ocfs2-tools, we still store the generation value for system
@@ -534,7 +527,6 @@ bail:
534 if (args && bh) 527 if (args && bh)
535 brelse(bh); 528 brelse(bh);
536 529
537 mlog_exit(status);
538 return status; 530 return status;
539} 531}
540 532
@@ -551,8 +543,6 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
551 struct ocfs2_dinode *fe; 543 struct ocfs2_dinode *fe;
552 handle_t *handle = NULL; 544 handle_t *handle = NULL;
553 545
554 mlog_entry_void();
555
556 fe = (struct ocfs2_dinode *) fe_bh->b_data; 546 fe = (struct ocfs2_dinode *) fe_bh->b_data;
557 547
558 /* 548 /*
@@ -600,7 +590,6 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
600out: 590out:
601 if (handle) 591 if (handle)
602 ocfs2_commit_trans(osb, handle); 592 ocfs2_commit_trans(osb, handle);
603 mlog_exit(status);
604 return status; 593 return status;
605} 594}
606 595
@@ -696,8 +685,6 @@ static int ocfs2_check_orphan_recovery_state(struct ocfs2_super *osb,
696 685
697 spin_lock(&osb->osb_lock); 686 spin_lock(&osb->osb_lock);
698 if (ocfs2_node_map_test_bit(osb, &osb->osb_recovering_orphan_dirs, slot)) { 687 if (ocfs2_node_map_test_bit(osb, &osb->osb_recovering_orphan_dirs, slot)) {
699 mlog(0, "Recovery is happening on orphan dir %d, will skip "
700 "this inode\n", slot);
701 ret = -EDEADLK; 688 ret = -EDEADLK;
702 goto out; 689 goto out;
703 } 690 }
@@ -706,6 +693,7 @@ static int ocfs2_check_orphan_recovery_state(struct ocfs2_super *osb,
706 osb->osb_orphan_wipes[slot]++; 693 osb->osb_orphan_wipes[slot]++;
707out: 694out:
708 spin_unlock(&osb->osb_lock); 695 spin_unlock(&osb->osb_lock);
696 trace_ocfs2_check_orphan_recovery_state(slot, ret);
709 return ret; 697 return ret;
710} 698}
711 699
@@ -816,6 +804,10 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
816 struct ocfs2_inode_info *oi = OCFS2_I(inode); 804 struct ocfs2_inode_info *oi = OCFS2_I(inode);
817 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 805 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
818 806
807 trace_ocfs2_inode_is_valid_to_delete(current, osb->dc_task,
808 (unsigned long long)oi->ip_blkno,
809 oi->ip_flags);
810
819 /* We shouldn't be getting here for the root directory 811 /* We shouldn't be getting here for the root directory
820 * inode.. */ 812 * inode.. */
821 if (inode == osb->root_inode) { 813 if (inode == osb->root_inode) {
@@ -828,11 +820,8 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
828 * have to skip deleting this guy. That's OK though because 820 * have to skip deleting this guy. That's OK though because
829 * the node who's doing the actual deleting should handle it 821 * the node who's doing the actual deleting should handle it
830 * anyway. */ 822 * anyway. */
831 if (current == osb->dc_task) { 823 if (current == osb->dc_task)
832 mlog(0, "Skipping delete of %lu because we're currently "
833 "in downconvert\n", inode->i_ino);
834 goto bail; 824 goto bail;
835 }
836 825
837 spin_lock(&oi->ip_lock); 826 spin_lock(&oi->ip_lock);
838 /* OCFS2 *never* deletes system files. This should technically 827 /* OCFS2 *never* deletes system files. This should technically
@@ -847,11 +836,8 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
847 /* If we have allowd wipe of this inode for another node, it 836 /* If we have allowd wipe of this inode for another node, it
848 * will be marked here so we can safely skip it. Recovery will 837 * will be marked here so we can safely skip it. Recovery will
849 * cleanup any inodes we might inadvertantly skip here. */ 838 * cleanup any inodes we might inadvertantly skip here. */
850 if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) { 839 if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE)
851 mlog(0, "Skipping delete of %lu because another node "
852 "has done this for us.\n", inode->i_ino);
853 goto bail_unlock; 840 goto bail_unlock;
854 }
855 841
856 ret = 1; 842 ret = 1;
857bail_unlock: 843bail_unlock:
@@ -868,28 +854,27 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
868 struct buffer_head *di_bh, 854 struct buffer_head *di_bh,
869 int *wipe) 855 int *wipe)
870{ 856{
871 int status = 0; 857 int status = 0, reason = 0;
872 struct ocfs2_inode_info *oi = OCFS2_I(inode); 858 struct ocfs2_inode_info *oi = OCFS2_I(inode);
873 struct ocfs2_dinode *di; 859 struct ocfs2_dinode *di;
874 860
875 *wipe = 0; 861 *wipe = 0;
876 862
863 trace_ocfs2_query_inode_wipe_begin((unsigned long long)oi->ip_blkno,
864 inode->i_nlink);
865
877 /* While we were waiting for the cluster lock in 866 /* While we were waiting for the cluster lock in
878 * ocfs2_delete_inode, another node might have asked to delete 867 * ocfs2_delete_inode, another node might have asked to delete
879 * the inode. Recheck our flags to catch this. */ 868 * the inode. Recheck our flags to catch this. */
880 if (!ocfs2_inode_is_valid_to_delete(inode)) { 869 if (!ocfs2_inode_is_valid_to_delete(inode)) {
881 mlog(0, "Skipping delete of %llu because flags changed\n", 870 reason = 1;
882 (unsigned long long)oi->ip_blkno);
883 goto bail; 871 goto bail;
884 } 872 }
885 873
886 /* Now that we have an up to date inode, we can double check 874 /* Now that we have an up to date inode, we can double check
887 * the link count. */ 875 * the link count. */
888 if (inode->i_nlink) { 876 if (inode->i_nlink)
889 mlog(0, "Skipping delete of %llu because nlink = %u\n",
890 (unsigned long long)oi->ip_blkno, inode->i_nlink);
891 goto bail; 877 goto bail;
892 }
893 878
894 /* Do some basic inode verification... */ 879 /* Do some basic inode verification... */
895 di = (struct ocfs2_dinode *) di_bh->b_data; 880 di = (struct ocfs2_dinode *) di_bh->b_data;
@@ -904,9 +889,7 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
904 * ORPHANED_FL not. 889 * ORPHANED_FL not.
905 */ 890 */
906 if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) { 891 if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) {
907 mlog(0, "Reflinked inode %llu is no longer orphaned. " 892 reason = 2;
908 "it shouldn't be deleted\n",
909 (unsigned long long)oi->ip_blkno);
910 goto bail; 893 goto bail;
911 } 894 }
912 895
@@ -943,8 +926,7 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
943 status = ocfs2_try_open_lock(inode, 1); 926 status = ocfs2_try_open_lock(inode, 1);
944 if (status == -EAGAIN) { 927 if (status == -EAGAIN) {
945 status = 0; 928 status = 0;
946 mlog(0, "Skipping delete of %llu because it is in use on " 929 reason = 3;
947 "other nodes\n", (unsigned long long)oi->ip_blkno);
948 goto bail; 930 goto bail;
949 } 931 }
950 if (status < 0) { 932 if (status < 0) {
@@ -953,11 +935,10 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
953 } 935 }
954 936
955 *wipe = 1; 937 *wipe = 1;
956 mlog(0, "Inode %llu is ok to wipe from orphan dir %u\n", 938 trace_ocfs2_query_inode_wipe_succ(le16_to_cpu(di->i_orphaned_slot));
957 (unsigned long long)oi->ip_blkno,
958 le16_to_cpu(di->i_orphaned_slot));
959 939
960bail: 940bail:
941 trace_ocfs2_query_inode_wipe_end(status, reason);
961 return status; 942 return status;
962} 943}
963 944
@@ -967,8 +948,8 @@ bail:
967static void ocfs2_cleanup_delete_inode(struct inode *inode, 948static void ocfs2_cleanup_delete_inode(struct inode *inode,
968 int sync_data) 949 int sync_data)
969{ 950{
970 mlog(0, "Cleanup inode %llu, sync = %d\n", 951 trace_ocfs2_cleanup_delete_inode(
971 (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data); 952 (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data);
972 if (sync_data) 953 if (sync_data)
973 write_inode_now(inode, 1); 954 write_inode_now(inode, 1);
974 truncate_inode_pages(&inode->i_data, 0); 955 truncate_inode_pages(&inode->i_data, 0);
@@ -980,15 +961,15 @@ static void ocfs2_delete_inode(struct inode *inode)
980 sigset_t oldset; 961 sigset_t oldset;
981 struct buffer_head *di_bh = NULL; 962 struct buffer_head *di_bh = NULL;
982 963
983 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); 964 trace_ocfs2_delete_inode(inode->i_ino,
965 (unsigned long long)OCFS2_I(inode)->ip_blkno,
966 is_bad_inode(inode));
984 967
985 /* When we fail in read_inode() we mark inode as bad. The second test 968 /* When we fail in read_inode() we mark inode as bad. The second test
986 * catches the case when inode allocation fails before allocating 969 * catches the case when inode allocation fails before allocating
987 * a block for inode. */ 970 * a block for inode. */
988 if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) { 971 if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno)
989 mlog(0, "Skipping delete of bad inode\n");
990 goto bail; 972 goto bail;
991 }
992 973
993 dquot_initialize(inode); 974 dquot_initialize(inode);
994 975
@@ -1080,7 +1061,7 @@ bail_unlock_nfs_sync:
1080bail_unblock: 1061bail_unblock:
1081 ocfs2_unblock_signals(&oldset); 1062 ocfs2_unblock_signals(&oldset);
1082bail: 1063bail:
1083 mlog_exit_void(); 1064 return;
1084} 1065}
1085 1066
1086static void ocfs2_clear_inode(struct inode *inode) 1067static void ocfs2_clear_inode(struct inode *inode)
@@ -1088,11 +1069,9 @@ static void ocfs2_clear_inode(struct inode *inode)
1088 int status; 1069 int status;
1089 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1070 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1090 1071
1091 mlog_entry_void();
1092
1093 end_writeback(inode); 1072 end_writeback(inode);
1094 mlog(0, "Clearing inode: %llu, nlink = %u\n", 1073 trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno,
1095 (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_nlink); 1074 inode->i_nlink);
1096 1075
1097 mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, 1076 mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
1098 "Inode=%lu\n", inode->i_ino); 1077 "Inode=%lu\n", inode->i_ino);
@@ -1181,8 +1160,6 @@ static void ocfs2_clear_inode(struct inode *inode)
1181 */ 1160 */
1182 jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, 1161 jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal,
1183 &oi->ip_jinode); 1162 &oi->ip_jinode);
1184
1185 mlog_exit_void();
1186} 1163}
1187 1164
1188void ocfs2_evict_inode(struct inode *inode) 1165void ocfs2_evict_inode(struct inode *inode)
@@ -1204,17 +1181,14 @@ int ocfs2_drop_inode(struct inode *inode)
1204 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1181 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1205 int res; 1182 int res;
1206 1183
1207 mlog_entry_void(); 1184 trace_ocfs2_drop_inode((unsigned long long)oi->ip_blkno,
1208 1185 inode->i_nlink, oi->ip_flags);
1209 mlog(0, "Drop inode %llu, nlink = %u, ip_flags = 0x%x\n",
1210 (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags);
1211 1186
1212 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) 1187 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
1213 res = 1; 1188 res = 1;
1214 else 1189 else
1215 res = generic_drop_inode(inode); 1190 res = generic_drop_inode(inode);
1216 1191
1217 mlog_exit_void();
1218 return res; 1192 return res;
1219} 1193}
1220 1194
@@ -1226,11 +1200,11 @@ int ocfs2_inode_revalidate(struct dentry *dentry)
1226 struct inode *inode = dentry->d_inode; 1200 struct inode *inode = dentry->d_inode;
1227 int status = 0; 1201 int status = 0;
1228 1202
1229 mlog_entry("(inode = 0x%p, ino = %llu)\n", inode, 1203 trace_ocfs2_inode_revalidate(inode,
1230 inode ? (unsigned long long)OCFS2_I(inode)->ip_blkno : 0ULL); 1204 inode ? (unsigned long long)OCFS2_I(inode)->ip_blkno : 0ULL,
1205 inode ? (unsigned long long)OCFS2_I(inode)->ip_flags : 0);
1231 1206
1232 if (!inode) { 1207 if (!inode) {
1233 mlog(0, "eep, no inode!\n");
1234 status = -ENOENT; 1208 status = -ENOENT;
1235 goto bail; 1209 goto bail;
1236 } 1210 }
@@ -1238,7 +1212,6 @@ int ocfs2_inode_revalidate(struct dentry *dentry)
1238 spin_lock(&OCFS2_I(inode)->ip_lock); 1212 spin_lock(&OCFS2_I(inode)->ip_lock);
1239 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { 1213 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
1240 spin_unlock(&OCFS2_I(inode)->ip_lock); 1214 spin_unlock(&OCFS2_I(inode)->ip_lock);
1241 mlog(0, "inode deleted!\n");
1242 status = -ENOENT; 1215 status = -ENOENT;
1243 goto bail; 1216 goto bail;
1244 } 1217 }
@@ -1254,8 +1227,6 @@ int ocfs2_inode_revalidate(struct dentry *dentry)
1254 } 1227 }
1255 ocfs2_inode_unlock(inode, 0); 1228 ocfs2_inode_unlock(inode, 0);
1256bail: 1229bail:
1257 mlog_exit(status);
1258
1259 return status; 1230 return status;
1260} 1231}
1261 1232
@@ -1271,8 +1242,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1271 int status; 1242 int status;
1272 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; 1243 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
1273 1244
1274 mlog_entry("(inode %llu)\n", 1245 trace_ocfs2_mark_inode_dirty((unsigned long long)OCFS2_I(inode)->ip_blkno);
1275 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1276 1246
1277 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh, 1247 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
1278 OCFS2_JOURNAL_ACCESS_WRITE); 1248 OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1302,7 +1272,6 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1302 1272
1303 ocfs2_journal_dirty(handle, bh); 1273 ocfs2_journal_dirty(handle, bh);
1304leave: 1274leave:
1305 mlog_exit(status);
1306 return status; 1275 return status;
1307} 1276}
1308 1277
@@ -1345,8 +1314,7 @@ int ocfs2_validate_inode_block(struct super_block *sb,
1345 int rc; 1314 int rc;
1346 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 1315 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
1347 1316
1348 mlog(0, "Validating dinode %llu\n", 1317 trace_ocfs2_validate_inode_block((unsigned long long)bh->b_blocknr);
1349 (unsigned long long)bh->b_blocknr);
1350 1318
1351 BUG_ON(!buffer_uptodate(bh)); 1319 BUG_ON(!buffer_uptodate(bh));
1352 1320
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 7a4868196152..8f13c5989eae 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -9,7 +9,6 @@
9#include <linux/mount.h> 9#include <linux/mount.h>
10#include <linux/compat.h> 10#include <linux/compat.h>
11 11
12#define MLOG_MASK_PREFIX ML_INODE
13#include <cluster/masklog.h> 12#include <cluster/masklog.h>
14 13
15#include "ocfs2.h" 14#include "ocfs2.h"
@@ -46,6 +45,22 @@ static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq,
46#define o2info_set_request_error(a, b) \ 45#define o2info_set_request_error(a, b) \
47 __o2info_set_request_error((struct ocfs2_info_request *)&(a), b) 46 __o2info_set_request_error((struct ocfs2_info_request *)&(a), b)
48 47
48static inline void __o2info_set_request_filled(struct ocfs2_info_request *req)
49{
50 req->ir_flags |= OCFS2_INFO_FL_FILLED;
51}
52
53#define o2info_set_request_filled(a) \
54 __o2info_set_request_filled((struct ocfs2_info_request *)&(a))
55
56static inline void __o2info_clear_request_filled(struct ocfs2_info_request *req)
57{
58 req->ir_flags &= ~OCFS2_INFO_FL_FILLED;
59}
60
61#define o2info_clear_request_filled(a) \
62 __o2info_clear_request_filled((struct ocfs2_info_request *)&(a))
63
49static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) 64static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
50{ 65{
51 int status; 66 int status;
@@ -59,7 +74,6 @@ static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
59 *flags = OCFS2_I(inode)->ip_attr; 74 *flags = OCFS2_I(inode)->ip_attr;
60 ocfs2_inode_unlock(inode, 0); 75 ocfs2_inode_unlock(inode, 0);
61 76
62 mlog_exit(status);
63 return status; 77 return status;
64} 78}
65 79
@@ -82,7 +96,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
82 } 96 }
83 97
84 status = -EACCES; 98 status = -EACCES;
85 if (!is_owner_or_cap(inode)) 99 if (!inode_owner_or_capable(inode))
86 goto bail_unlock; 100 goto bail_unlock;
87 101
88 if (!S_ISDIR(inode->i_mode)) 102 if (!S_ISDIR(inode->i_mode))
@@ -125,7 +139,6 @@ bail:
125 139
126 brelse(bh); 140 brelse(bh);
127 141
128 mlog_exit(status);
129 return status; 142 return status;
130} 143}
131 144
@@ -139,7 +152,8 @@ int ocfs2_info_handle_blocksize(struct inode *inode,
139 goto bail; 152 goto bail;
140 153
141 oib.ib_blocksize = inode->i_sb->s_blocksize; 154 oib.ib_blocksize = inode->i_sb->s_blocksize;
142 oib.ib_req.ir_flags |= OCFS2_INFO_FL_FILLED; 155
156 o2info_set_request_filled(oib);
143 157
144 if (o2info_to_user(oib, req)) 158 if (o2info_to_user(oib, req))
145 goto bail; 159 goto bail;
@@ -163,7 +177,8 @@ int ocfs2_info_handle_clustersize(struct inode *inode,
163 goto bail; 177 goto bail;
164 178
165 oic.ic_clustersize = osb->s_clustersize; 179 oic.ic_clustersize = osb->s_clustersize;
166 oic.ic_req.ir_flags |= OCFS2_INFO_FL_FILLED; 180
181 o2info_set_request_filled(oic);
167 182
168 if (o2info_to_user(oic, req)) 183 if (o2info_to_user(oic, req))
169 goto bail; 184 goto bail;
@@ -187,7 +202,8 @@ int ocfs2_info_handle_maxslots(struct inode *inode,
187 goto bail; 202 goto bail;
188 203
189 oim.im_max_slots = osb->max_slots; 204 oim.im_max_slots = osb->max_slots;
190 oim.im_req.ir_flags |= OCFS2_INFO_FL_FILLED; 205
206 o2info_set_request_filled(oim);
191 207
192 if (o2info_to_user(oim, req)) 208 if (o2info_to_user(oim, req))
193 goto bail; 209 goto bail;
@@ -211,7 +227,8 @@ int ocfs2_info_handle_label(struct inode *inode,
211 goto bail; 227 goto bail;
212 228
213 memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); 229 memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN);
214 oil.il_req.ir_flags |= OCFS2_INFO_FL_FILLED; 230
231 o2info_set_request_filled(oil);
215 232
216 if (o2info_to_user(oil, req)) 233 if (o2info_to_user(oil, req))
217 goto bail; 234 goto bail;
@@ -235,7 +252,8 @@ int ocfs2_info_handle_uuid(struct inode *inode,
235 goto bail; 252 goto bail;
236 253
237 memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); 254 memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1);
238 oiu.iu_req.ir_flags |= OCFS2_INFO_FL_FILLED; 255
256 o2info_set_request_filled(oiu);
239 257
240 if (o2info_to_user(oiu, req)) 258 if (o2info_to_user(oiu, req))
241 goto bail; 259 goto bail;
@@ -261,7 +279,8 @@ int ocfs2_info_handle_fs_features(struct inode *inode,
261 oif.if_compat_features = osb->s_feature_compat; 279 oif.if_compat_features = osb->s_feature_compat;
262 oif.if_incompat_features = osb->s_feature_incompat; 280 oif.if_incompat_features = osb->s_feature_incompat;
263 oif.if_ro_compat_features = osb->s_feature_ro_compat; 281 oif.if_ro_compat_features = osb->s_feature_ro_compat;
264 oif.if_req.ir_flags |= OCFS2_INFO_FL_FILLED; 282
283 o2info_set_request_filled(oif);
265 284
266 if (o2info_to_user(oif, req)) 285 if (o2info_to_user(oif, req))
267 goto bail; 286 goto bail;
@@ -286,7 +305,7 @@ int ocfs2_info_handle_journal_size(struct inode *inode,
286 305
287 oij.ij_journal_size = osb->journal->j_inode->i_size; 306 oij.ij_journal_size = osb->journal->j_inode->i_size;
288 307
289 oij.ij_req.ir_flags |= OCFS2_INFO_FL_FILLED; 308 o2info_set_request_filled(oij);
290 309
291 if (o2info_to_user(oij, req)) 310 if (o2info_to_user(oij, req))
292 goto bail; 311 goto bail;
@@ -308,7 +327,7 @@ int ocfs2_info_handle_unknown(struct inode *inode,
308 if (o2info_from_user(oir, req)) 327 if (o2info_from_user(oir, req))
309 goto bail; 328 goto bail;
310 329
311 oir.ir_flags &= ~OCFS2_INFO_FL_FILLED; 330 o2info_clear_request_filled(oir);
312 331
313 if (o2info_to_user(oir, req)) 332 if (o2info_to_user(oir, req))
314 goto bail; 333 goto bail;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index faa2303dbf0a..dcc2d9327150 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -31,7 +31,6 @@
31#include <linux/time.h> 31#include <linux/time.h>
32#include <linux/random.h> 32#include <linux/random.h>
33 33
34#define MLOG_MASK_PREFIX ML_JOURNAL
35#include <cluster/masklog.h> 34#include <cluster/masklog.h>
36 35
37#include "ocfs2.h" 36#include "ocfs2.h"
@@ -52,6 +51,7 @@
52#include "quota.h" 51#include "quota.h"
53 52
54#include "buffer_head_io.h" 53#include "buffer_head_io.h"
54#include "ocfs2_trace.h"
55 55
56DEFINE_SPINLOCK(trans_inc_lock); 56DEFINE_SPINLOCK(trans_inc_lock);
57 57
@@ -303,16 +303,15 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
303 unsigned int flushed; 303 unsigned int flushed;
304 struct ocfs2_journal *journal = NULL; 304 struct ocfs2_journal *journal = NULL;
305 305
306 mlog_entry_void();
307
308 journal = osb->journal; 306 journal = osb->journal;
309 307
310 /* Flush all pending commits and checkpoint the journal. */ 308 /* Flush all pending commits and checkpoint the journal. */
311 down_write(&journal->j_trans_barrier); 309 down_write(&journal->j_trans_barrier);
312 310
313 if (atomic_read(&journal->j_num_trans) == 0) { 311 flushed = atomic_read(&journal->j_num_trans);
312 trace_ocfs2_commit_cache_begin(flushed);
313 if (flushed == 0) {
314 up_write(&journal->j_trans_barrier); 314 up_write(&journal->j_trans_barrier);
315 mlog(0, "No transactions for me to flush!\n");
316 goto finally; 315 goto finally;
317 } 316 }
318 317
@@ -331,13 +330,11 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
331 atomic_set(&journal->j_num_trans, 0); 330 atomic_set(&journal->j_num_trans, 0);
332 up_write(&journal->j_trans_barrier); 331 up_write(&journal->j_trans_barrier);
333 332
334 mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n", 333 trace_ocfs2_commit_cache_end(journal->j_trans_id, flushed);
335 journal->j_trans_id, flushed);
336 334
337 ocfs2_wake_downconvert_thread(osb); 335 ocfs2_wake_downconvert_thread(osb);
338 wake_up(&journal->j_checkpointed); 336 wake_up(&journal->j_checkpointed);
339finally: 337finally:
340 mlog_exit(status);
341 return status; 338 return status;
342} 339}
343 340
@@ -425,9 +422,8 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
425 return 0; 422 return 0;
426 423
427 old_nblocks = handle->h_buffer_credits; 424 old_nblocks = handle->h_buffer_credits;
428 mlog_entry_void();
429 425
430 mlog(0, "Trying to extend transaction by %d blocks\n", nblocks); 426 trace_ocfs2_extend_trans(old_nblocks, nblocks);
431 427
432#ifdef CONFIG_OCFS2_DEBUG_FS 428#ifdef CONFIG_OCFS2_DEBUG_FS
433 status = 1; 429 status = 1;
@@ -440,9 +436,7 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
440#endif 436#endif
441 437
442 if (status > 0) { 438 if (status > 0) {
443 mlog(0, 439 trace_ocfs2_extend_trans_restart(old_nblocks + nblocks);
444 "jbd2_journal_extend failed, trying "
445 "jbd2_journal_restart\n");
446 status = jbd2_journal_restart(handle, 440 status = jbd2_journal_restart(handle,
447 old_nblocks + nblocks); 441 old_nblocks + nblocks);
448 if (status < 0) { 442 if (status < 0) {
@@ -453,8 +447,6 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
453 447
454 status = 0; 448 status = 0;
455bail: 449bail:
456
457 mlog_exit(status);
458 return status; 450 return status;
459} 451}
460 452
@@ -622,12 +614,9 @@ static int __ocfs2_journal_access(handle_t *handle,
622 BUG_ON(!handle); 614 BUG_ON(!handle);
623 BUG_ON(!bh); 615 BUG_ON(!bh);
624 616
625 mlog_entry("bh->b_blocknr=%llu, type=%d (\"%s\"), bh->b_size = %zu\n", 617 trace_ocfs2_journal_access(
626 (unsigned long long)bh->b_blocknr, type, 618 (unsigned long long)ocfs2_metadata_cache_owner(ci),
627 (type == OCFS2_JOURNAL_ACCESS_CREATE) ? 619 (unsigned long long)bh->b_blocknr, type, bh->b_size);
628 "OCFS2_JOURNAL_ACCESS_CREATE" :
629 "OCFS2_JOURNAL_ACCESS_WRITE",
630 bh->b_size);
631 620
632 /* we can safely remove this assertion after testing. */ 621 /* we can safely remove this assertion after testing. */
633 if (!buffer_uptodate(bh)) { 622 if (!buffer_uptodate(bh)) {
@@ -668,7 +657,6 @@ static int __ocfs2_journal_access(handle_t *handle,
668 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n", 657 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n",
669 status, type); 658 status, type);
670 659
671 mlog_exit(status);
672 return status; 660 return status;
673} 661}
674 662
@@ -737,13 +725,10 @@ void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
737{ 725{
738 int status; 726 int status;
739 727
740 mlog_entry("(bh->b_blocknr=%llu)\n", 728 trace_ocfs2_journal_dirty((unsigned long long)bh->b_blocknr);
741 (unsigned long long)bh->b_blocknr);
742 729
743 status = jbd2_journal_dirty_metadata(handle, bh); 730 status = jbd2_journal_dirty_metadata(handle, bh);
744 BUG_ON(status); 731 BUG_ON(status);
745
746 mlog_exit_void();
747} 732}
748 733
749#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) 734#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
@@ -775,8 +760,6 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
775 struct ocfs2_super *osb; 760 struct ocfs2_super *osb;
776 int inode_lock = 0; 761 int inode_lock = 0;
777 762
778 mlog_entry_void();
779
780 BUG_ON(!journal); 763 BUG_ON(!journal);
781 764
782 osb = journal->j_osb; 765 osb = journal->j_osb;
@@ -820,10 +803,9 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
820 goto done; 803 goto done;
821 } 804 }
822 805
823 mlog(0, "inode->i_size = %lld\n", inode->i_size); 806 trace_ocfs2_journal_init(inode->i_size,
824 mlog(0, "inode->i_blocks = %llu\n", 807 (unsigned long long)inode->i_blocks,
825 (unsigned long long)inode->i_blocks); 808 OCFS2_I(inode)->ip_clusters);
826 mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters);
827 809
828 /* call the kernels journal init function now */ 810 /* call the kernels journal init function now */
829 j_journal = jbd2_journal_init_inode(inode); 811 j_journal = jbd2_journal_init_inode(inode);
@@ -833,8 +815,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
833 goto done; 815 goto done;
834 } 816 }
835 817
836 mlog(0, "Returned from jbd2_journal_init_inode\n"); 818 trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen);
837 mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen);
838 819
839 *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & 820 *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
840 OCFS2_JOURNAL_DIRTY_FL); 821 OCFS2_JOURNAL_DIRTY_FL);
@@ -859,7 +840,6 @@ done:
859 } 840 }
860 } 841 }
861 842
862 mlog_exit(status);
863 return status; 843 return status;
864} 844}
865 845
@@ -882,8 +862,6 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
882 struct buffer_head *bh = journal->j_bh; 862 struct buffer_head *bh = journal->j_bh;
883 struct ocfs2_dinode *fe; 863 struct ocfs2_dinode *fe;
884 864
885 mlog_entry_void();
886
887 fe = (struct ocfs2_dinode *)bh->b_data; 865 fe = (struct ocfs2_dinode *)bh->b_data;
888 866
889 /* The journal bh on the osb always comes from ocfs2_journal_init() 867 /* The journal bh on the osb always comes from ocfs2_journal_init()
@@ -906,7 +884,6 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
906 if (status < 0) 884 if (status < 0)
907 mlog_errno(status); 885 mlog_errno(status);
908 886
909 mlog_exit(status);
910 return status; 887 return status;
911} 888}
912 889
@@ -921,8 +898,6 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
921 struct inode *inode = NULL; 898 struct inode *inode = NULL;
922 int num_running_trans = 0; 899 int num_running_trans = 0;
923 900
924 mlog_entry_void();
925
926 BUG_ON(!osb); 901 BUG_ON(!osb);
927 902
928 journal = osb->journal; 903 journal = osb->journal;
@@ -939,10 +914,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
939 BUG(); 914 BUG();
940 915
941 num_running_trans = atomic_read(&(osb->journal->j_num_trans)); 916 num_running_trans = atomic_read(&(osb->journal->j_num_trans));
942 if (num_running_trans > 0) 917 trace_ocfs2_journal_shutdown(num_running_trans);
943 mlog(0, "Shutting down journal: must wait on %d "
944 "running transactions!\n",
945 num_running_trans);
946 918
947 /* Do a commit_cache here. It will flush our journal, *and* 919 /* Do a commit_cache here. It will flush our journal, *and*
948 * release any locks that are still held. 920 * release any locks that are still held.
@@ -955,7 +927,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
955 * completely destroy the journal. */ 927 * completely destroy the journal. */
956 if (osb->commit_task) { 928 if (osb->commit_task) {
957 /* Wait for the commit thread */ 929 /* Wait for the commit thread */
958 mlog(0, "Waiting for ocfs2commit to exit....\n"); 930 trace_ocfs2_journal_shutdown_wait(osb->commit_task);
959 kthread_stop(osb->commit_task); 931 kthread_stop(osb->commit_task);
960 osb->commit_task = NULL; 932 osb->commit_task = NULL;
961 } 933 }
@@ -998,7 +970,6 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
998done: 970done:
999 if (inode) 971 if (inode)
1000 iput(inode); 972 iput(inode);
1001 mlog_exit_void();
1002} 973}
1003 974
1004static void ocfs2_clear_journal_error(struct super_block *sb, 975static void ocfs2_clear_journal_error(struct super_block *sb,
@@ -1024,8 +995,6 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
1024 int status = 0; 995 int status = 0;
1025 struct ocfs2_super *osb; 996 struct ocfs2_super *osb;
1026 997
1027 mlog_entry_void();
1028
1029 BUG_ON(!journal); 998 BUG_ON(!journal);
1030 999
1031 osb = journal->j_osb; 1000 osb = journal->j_osb;
@@ -1059,7 +1028,6 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
1059 osb->commit_task = NULL; 1028 osb->commit_task = NULL;
1060 1029
1061done: 1030done:
1062 mlog_exit(status);
1063 return status; 1031 return status;
1064} 1032}
1065 1033
@@ -1070,8 +1038,6 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
1070{ 1038{
1071 int status; 1039 int status;
1072 1040
1073 mlog_entry_void();
1074
1075 BUG_ON(!journal); 1041 BUG_ON(!journal);
1076 1042
1077 status = jbd2_journal_wipe(journal->j_journal, full); 1043 status = jbd2_journal_wipe(journal->j_journal, full);
@@ -1085,7 +1051,6 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
1085 mlog_errno(status); 1051 mlog_errno(status);
1086 1052
1087bail: 1053bail:
1088 mlog_exit(status);
1089 return status; 1054 return status;
1090} 1055}
1091 1056
@@ -1124,8 +1089,6 @@ static int ocfs2_force_read_journal(struct inode *inode)
1124#define CONCURRENT_JOURNAL_FILL 32ULL 1089#define CONCURRENT_JOURNAL_FILL 32ULL
1125 struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL]; 1090 struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];
1126 1091
1127 mlog_entry_void();
1128
1129 memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL); 1092 memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
1130 1093
1131 num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size); 1094 num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size);
@@ -1161,7 +1124,6 @@ static int ocfs2_force_read_journal(struct inode *inode)
1161bail: 1124bail:
1162 for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) 1125 for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
1163 brelse(bhs[i]); 1126 brelse(bhs[i]);
1164 mlog_exit(status);
1165 return status; 1127 return status;
1166} 1128}
1167 1129
@@ -1185,7 +1147,7 @@ struct ocfs2_la_recovery_item {
1185 */ 1147 */
1186void ocfs2_complete_recovery(struct work_struct *work) 1148void ocfs2_complete_recovery(struct work_struct *work)
1187{ 1149{
1188 int ret; 1150 int ret = 0;
1189 struct ocfs2_journal *journal = 1151 struct ocfs2_journal *journal =
1190 container_of(work, struct ocfs2_journal, j_recovery_work); 1152 container_of(work, struct ocfs2_journal, j_recovery_work);
1191 struct ocfs2_super *osb = journal->j_osb; 1153 struct ocfs2_super *osb = journal->j_osb;
@@ -1194,9 +1156,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
1194 struct ocfs2_quota_recovery *qrec; 1156 struct ocfs2_quota_recovery *qrec;
1195 LIST_HEAD(tmp_la_list); 1157 LIST_HEAD(tmp_la_list);
1196 1158
1197 mlog_entry_void(); 1159 trace_ocfs2_complete_recovery(
1198 1160 (unsigned long long)OCFS2_I(journal->j_inode)->ip_blkno);
1199 mlog(0, "completing recovery from keventd\n");
1200 1161
1201 spin_lock(&journal->j_lock); 1162 spin_lock(&journal->j_lock);
1202 list_splice_init(&journal->j_la_cleanups, &tmp_la_list); 1163 list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
@@ -1205,15 +1166,18 @@ void ocfs2_complete_recovery(struct work_struct *work)
1205 list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) { 1166 list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
1206 list_del_init(&item->lri_list); 1167 list_del_init(&item->lri_list);
1207 1168
1208 mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
1209
1210 ocfs2_wait_on_quotas(osb); 1169 ocfs2_wait_on_quotas(osb);
1211 1170
1212 la_dinode = item->lri_la_dinode; 1171 la_dinode = item->lri_la_dinode;
1213 if (la_dinode) { 1172 tl_dinode = item->lri_tl_dinode;
1214 mlog(0, "Clean up local alloc %llu\n", 1173 qrec = item->lri_qrec;
1215 (unsigned long long)le64_to_cpu(la_dinode->i_blkno)); 1174
1175 trace_ocfs2_complete_recovery_slot(item->lri_slot,
1176 la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0,
1177 tl_dinode ? le64_to_cpu(tl_dinode->i_blkno) : 0,
1178 qrec);
1216 1179
1180 if (la_dinode) {
1217 ret = ocfs2_complete_local_alloc_recovery(osb, 1181 ret = ocfs2_complete_local_alloc_recovery(osb,
1218 la_dinode); 1182 la_dinode);
1219 if (ret < 0) 1183 if (ret < 0)
@@ -1222,11 +1186,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
1222 kfree(la_dinode); 1186 kfree(la_dinode);
1223 } 1187 }
1224 1188
1225 tl_dinode = item->lri_tl_dinode;
1226 if (tl_dinode) { 1189 if (tl_dinode) {
1227 mlog(0, "Clean up truncate log %llu\n",
1228 (unsigned long long)le64_to_cpu(tl_dinode->i_blkno));
1229
1230 ret = ocfs2_complete_truncate_log_recovery(osb, 1190 ret = ocfs2_complete_truncate_log_recovery(osb,
1231 tl_dinode); 1191 tl_dinode);
1232 if (ret < 0) 1192 if (ret < 0)
@@ -1239,9 +1199,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
1239 if (ret < 0) 1199 if (ret < 0)
1240 mlog_errno(ret); 1200 mlog_errno(ret);
1241 1201
1242 qrec = item->lri_qrec;
1243 if (qrec) { 1202 if (qrec) {
1244 mlog(0, "Recovering quota files");
1245 ret = ocfs2_finish_quota_recovery(osb, qrec, 1203 ret = ocfs2_finish_quota_recovery(osb, qrec,
1246 item->lri_slot); 1204 item->lri_slot);
1247 if (ret < 0) 1205 if (ret < 0)
@@ -1252,8 +1210,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
1252 kfree(item); 1210 kfree(item);
1253 } 1211 }
1254 1212
1255 mlog(0, "Recovery completion\n"); 1213 trace_ocfs2_complete_recovery_end(ret);
1256 mlog_exit_void();
1257} 1214}
1258 1215
1259/* NOTE: This function always eats your references to la_dinode and 1216/* NOTE: This function always eats your references to la_dinode and
@@ -1339,8 +1296,6 @@ static int __ocfs2_recovery_thread(void *arg)
1339 int rm_quota_used = 0, i; 1296 int rm_quota_used = 0, i;
1340 struct ocfs2_quota_recovery *qrec; 1297 struct ocfs2_quota_recovery *qrec;
1341 1298
1342 mlog_entry_void();
1343
1344 status = ocfs2_wait_on_mount(osb); 1299 status = ocfs2_wait_on_mount(osb);
1345 if (status < 0) { 1300 if (status < 0) {
1346 goto bail; 1301 goto bail;
@@ -1372,15 +1327,12 @@ restart:
1372 * clear it until ocfs2_recover_node() has succeeded. */ 1327 * clear it until ocfs2_recover_node() has succeeded. */
1373 node_num = rm->rm_entries[0]; 1328 node_num = rm->rm_entries[0];
1374 spin_unlock(&osb->osb_lock); 1329 spin_unlock(&osb->osb_lock);
1375 mlog(0, "checking node %d\n", node_num);
1376 slot_num = ocfs2_node_num_to_slot(osb, node_num); 1330 slot_num = ocfs2_node_num_to_slot(osb, node_num);
1331 trace_ocfs2_recovery_thread_node(node_num, slot_num);
1377 if (slot_num == -ENOENT) { 1332 if (slot_num == -ENOENT) {
1378 status = 0; 1333 status = 0;
1379 mlog(0, "no slot for this node, so no recovery"
1380 "required.\n");
1381 goto skip_recovery; 1334 goto skip_recovery;
1382 } 1335 }
1383 mlog(0, "node %d was using slot %d\n", node_num, slot_num);
1384 1336
1385 /* It is a bit subtle with quota recovery. We cannot do it 1337 /* It is a bit subtle with quota recovery. We cannot do it
1386 * immediately because we have to obtain cluster locks from 1338 * immediately because we have to obtain cluster locks from
@@ -1407,7 +1359,7 @@ skip_recovery:
1407 spin_lock(&osb->osb_lock); 1359 spin_lock(&osb->osb_lock);
1408 } 1360 }
1409 spin_unlock(&osb->osb_lock); 1361 spin_unlock(&osb->osb_lock);
1410 mlog(0, "All nodes recovered\n"); 1362 trace_ocfs2_recovery_thread_end(status);
1411 1363
1412 /* Refresh all journal recovery generations from disk */ 1364 /* Refresh all journal recovery generations from disk */
1413 status = ocfs2_check_journals_nolocks(osb); 1365 status = ocfs2_check_journals_nolocks(osb);
@@ -1451,7 +1403,6 @@ bail:
1451 if (rm_quota) 1403 if (rm_quota)
1452 kfree(rm_quota); 1404 kfree(rm_quota);
1453 1405
1454 mlog_exit(status);
1455 /* no one is callint kthread_stop() for us so the kthread() api 1406 /* no one is callint kthread_stop() for us so the kthread() api
1456 * requires that we call do_exit(). And it isn't exported, but 1407 * requires that we call do_exit(). And it isn't exported, but
1457 * complete_and_exit() seems to be a minimal wrapper around it. */ 1408 * complete_and_exit() seems to be a minimal wrapper around it. */
@@ -1461,19 +1412,15 @@ bail:
1461 1412
1462void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) 1413void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
1463{ 1414{
1464 mlog_entry("(node_num=%d, osb->node_num = %d)\n",
1465 node_num, osb->node_num);
1466
1467 mutex_lock(&osb->recovery_lock); 1415 mutex_lock(&osb->recovery_lock);
1468 if (osb->disable_recovery)
1469 goto out;
1470 1416
1471 /* People waiting on recovery will wait on 1417 trace_ocfs2_recovery_thread(node_num, osb->node_num,
1472 * the recovery map to empty. */ 1418 osb->disable_recovery, osb->recovery_thread_task,
1473 if (ocfs2_recovery_map_set(osb, node_num)) 1419 osb->disable_recovery ?
1474 mlog(0, "node %d already in recovery map.\n", node_num); 1420 -1 : ocfs2_recovery_map_set(osb, node_num));
1475 1421
1476 mlog(0, "starting recovery thread...\n"); 1422 if (osb->disable_recovery)
1423 goto out;
1477 1424
1478 if (osb->recovery_thread_task) 1425 if (osb->recovery_thread_task)
1479 goto out; 1426 goto out;
@@ -1488,8 +1435,6 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
1488out: 1435out:
1489 mutex_unlock(&osb->recovery_lock); 1436 mutex_unlock(&osb->recovery_lock);
1490 wake_up(&osb->recovery_event); 1437 wake_up(&osb->recovery_event);
1491
1492 mlog_exit_void();
1493} 1438}
1494 1439
1495static int ocfs2_read_journal_inode(struct ocfs2_super *osb, 1440static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
@@ -1563,7 +1508,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1563 * If not, it needs recovery. 1508 * If not, it needs recovery.
1564 */ 1509 */
1565 if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) { 1510 if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
1566 mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num, 1511 trace_ocfs2_replay_journal_recovered(slot_num,
1567 osb->slot_recovery_generations[slot_num], slot_reco_gen); 1512 osb->slot_recovery_generations[slot_num], slot_reco_gen);
1568 osb->slot_recovery_generations[slot_num] = slot_reco_gen; 1513 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1569 status = -EBUSY; 1514 status = -EBUSY;
@@ -1574,7 +1519,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1574 1519
1575 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); 1520 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
1576 if (status < 0) { 1521 if (status < 0) {
1577 mlog(0, "status returned from ocfs2_inode_lock=%d\n", status); 1522 trace_ocfs2_replay_journal_lock_err(status);
1578 if (status != -ERESTARTSYS) 1523 if (status != -ERESTARTSYS)
1579 mlog(ML_ERROR, "Could not lock journal!\n"); 1524 mlog(ML_ERROR, "Could not lock journal!\n");
1580 goto done; 1525 goto done;
@@ -1587,7 +1532,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1587 slot_reco_gen = ocfs2_get_recovery_generation(fe); 1532 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1588 1533
1589 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { 1534 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
1590 mlog(0, "No recovery required for node %d\n", node_num); 1535 trace_ocfs2_replay_journal_skip(node_num);
1591 /* Refresh recovery generation for the slot */ 1536 /* Refresh recovery generation for the slot */
1592 osb->slot_recovery_generations[slot_num] = slot_reco_gen; 1537 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1593 goto done; 1538 goto done;
@@ -1608,7 +1553,6 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1608 goto done; 1553 goto done;
1609 } 1554 }
1610 1555
1611 mlog(0, "calling journal_init_inode\n");
1612 journal = jbd2_journal_init_inode(inode); 1556 journal = jbd2_journal_init_inode(inode);
1613 if (journal == NULL) { 1557 if (journal == NULL) {
1614 mlog(ML_ERROR, "Linux journal layer error\n"); 1558 mlog(ML_ERROR, "Linux journal layer error\n");
@@ -1628,7 +1572,6 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1628 ocfs2_clear_journal_error(osb->sb, journal, slot_num); 1572 ocfs2_clear_journal_error(osb->sb, journal, slot_num);
1629 1573
1630 /* wipe the journal */ 1574 /* wipe the journal */
1631 mlog(0, "flushing the journal.\n");
1632 jbd2_journal_lock_updates(journal); 1575 jbd2_journal_lock_updates(journal);
1633 status = jbd2_journal_flush(journal); 1576 status = jbd2_journal_flush(journal);
1634 jbd2_journal_unlock_updates(journal); 1577 jbd2_journal_unlock_updates(journal);
@@ -1665,7 +1608,6 @@ done:
1665 1608
1666 brelse(bh); 1609 brelse(bh);
1667 1610
1668 mlog_exit(status);
1669 return status; 1611 return status;
1670} 1612}
1671 1613
@@ -1688,8 +1630,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1688 struct ocfs2_dinode *la_copy = NULL; 1630 struct ocfs2_dinode *la_copy = NULL;
1689 struct ocfs2_dinode *tl_copy = NULL; 1631 struct ocfs2_dinode *tl_copy = NULL;
1690 1632
1691 mlog_entry("(node_num=%d, slot_num=%d, osb->node_num = %d)\n", 1633 trace_ocfs2_recover_node(node_num, slot_num, osb->node_num);
1692 node_num, slot_num, osb->node_num);
1693 1634
1694 /* Should not ever be called to recover ourselves -- in that 1635 /* Should not ever be called to recover ourselves -- in that
1695 * case we should've called ocfs2_journal_load instead. */ 1636 * case we should've called ocfs2_journal_load instead. */
@@ -1698,9 +1639,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1698 status = ocfs2_replay_journal(osb, node_num, slot_num); 1639 status = ocfs2_replay_journal(osb, node_num, slot_num);
1699 if (status < 0) { 1640 if (status < 0) {
1700 if (status == -EBUSY) { 1641 if (status == -EBUSY) {
1701 mlog(0, "Skipping recovery for slot %u (node %u) " 1642 trace_ocfs2_recover_node_skip(slot_num, node_num);
1702 "as another node has recovered it\n", slot_num,
1703 node_num);
1704 status = 0; 1643 status = 0;
1705 goto done; 1644 goto done;
1706 } 1645 }
@@ -1735,7 +1674,6 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1735 status = 0; 1674 status = 0;
1736done: 1675done:
1737 1676
1738 mlog_exit(status);
1739 return status; 1677 return status;
1740} 1678}
1741 1679
@@ -1808,8 +1746,8 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1808 spin_lock(&osb->osb_lock); 1746 spin_lock(&osb->osb_lock);
1809 osb->slot_recovery_generations[i] = gen; 1747 osb->slot_recovery_generations[i] = gen;
1810 1748
1811 mlog(0, "Slot %u recovery generation is %u\n", i, 1749 trace_ocfs2_mark_dead_nodes(i,
1812 osb->slot_recovery_generations[i]); 1750 osb->slot_recovery_generations[i]);
1813 1751
1814 if (i == osb->slot_num) { 1752 if (i == osb->slot_num) {
1815 spin_unlock(&osb->osb_lock); 1753 spin_unlock(&osb->osb_lock);
@@ -1845,7 +1783,6 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1845 1783
1846 status = 0; 1784 status = 0;
1847bail: 1785bail:
1848 mlog_exit(status);
1849 return status; 1786 return status;
1850} 1787}
1851 1788
@@ -1884,11 +1821,12 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1884 1821
1885 os = &osb->osb_orphan_scan; 1822 os = &osb->osb_orphan_scan;
1886 1823
1887 mlog(0, "Begin orphan scan\n");
1888
1889 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) 1824 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1890 goto out; 1825 goto out;
1891 1826
1827 trace_ocfs2_queue_orphan_scan_begin(os->os_count, os->os_seqno,
1828 atomic_read(&os->os_state));
1829
1892 status = ocfs2_orphan_scan_lock(osb, &seqno); 1830 status = ocfs2_orphan_scan_lock(osb, &seqno);
1893 if (status < 0) { 1831 if (status < 0) {
1894 if (status != -EAGAIN) 1832 if (status != -EAGAIN)
@@ -1918,7 +1856,8 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1918unlock: 1856unlock:
1919 ocfs2_orphan_scan_unlock(osb, seqno); 1857 ocfs2_orphan_scan_unlock(osb, seqno);
1920out: 1858out:
1921 mlog(0, "Orphan scan completed\n"); 1859 trace_ocfs2_queue_orphan_scan_end(os->os_count, os->os_seqno,
1860 atomic_read(&os->os_state));
1922 return; 1861 return;
1923} 1862}
1924 1863
@@ -2002,8 +1941,7 @@ static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len,
2002 if (IS_ERR(iter)) 1941 if (IS_ERR(iter))
2003 return 0; 1942 return 0;
2004 1943
2005 mlog(0, "queue orphan %llu\n", 1944 trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno);
2006 (unsigned long long)OCFS2_I(iter)->ip_blkno);
2007 /* No locking is required for the next_orphan queue as there 1945 /* No locking is required for the next_orphan queue as there
2008 * is only ever a single process doing orphan recovery. */ 1946 * is only ever a single process doing orphan recovery. */
2009 OCFS2_I(iter)->ip_next_orphan = p->head; 1947 OCFS2_I(iter)->ip_next_orphan = p->head;
@@ -2119,7 +2057,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
2119 struct inode *iter; 2057 struct inode *iter;
2120 struct ocfs2_inode_info *oi; 2058 struct ocfs2_inode_info *oi;
2121 2059
2122 mlog(0, "Recover inodes from orphan dir in slot %d\n", slot); 2060 trace_ocfs2_recover_orphans(slot);
2123 2061
2124 ocfs2_mark_recovering_orphan_dir(osb, slot); 2062 ocfs2_mark_recovering_orphan_dir(osb, slot);
2125 ret = ocfs2_queue_orphans(osb, slot, &inode); 2063 ret = ocfs2_queue_orphans(osb, slot, &inode);
@@ -2132,7 +2070,8 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
2132 2070
2133 while (inode) { 2071 while (inode) {
2134 oi = OCFS2_I(inode); 2072 oi = OCFS2_I(inode);
2135 mlog(0, "iput orphan %llu\n", (unsigned long long)oi->ip_blkno); 2073 trace_ocfs2_recover_orphans_iput(
2074 (unsigned long long)oi->ip_blkno);
2136 2075
2137 iter = oi->ip_next_orphan; 2076 iter = oi->ip_next_orphan;
2138 2077
@@ -2170,6 +2109,7 @@ static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
2170 * MOUNTED flag, but this is set right before 2109 * MOUNTED flag, but this is set right before
2171 * dismount_volume() so we can trust it. */ 2110 * dismount_volume() so we can trust it. */
2172 if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) { 2111 if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) {
2112 trace_ocfs2_wait_on_mount(VOLUME_DISABLED);
2173 mlog(0, "mount error, exiting!\n"); 2113 mlog(0, "mount error, exiting!\n");
2174 return -EBUSY; 2114 return -EBUSY;
2175 } 2115 }
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index ec6adbf8f551..210c35237548 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -29,7 +29,6 @@
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/bitops.h> 30#include <linux/bitops.h>
31 31
32#define MLOG_MASK_PREFIX ML_DISK_ALLOC
33#include <cluster/masklog.h> 32#include <cluster/masklog.h>
34 33
35#include "ocfs2.h" 34#include "ocfs2.h"
@@ -43,6 +42,7 @@
43#include "suballoc.h" 42#include "suballoc.h"
44#include "super.h" 43#include "super.h"
45#include "sysfile.h" 44#include "sysfile.h"
45#include "ocfs2_trace.h"
46 46
47#include "buffer_head_io.h" 47#include "buffer_head_io.h"
48 48
@@ -201,8 +201,7 @@ void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb)
201 la_max_mb = ocfs2_clusters_to_megabytes(sb, 201 la_max_mb = ocfs2_clusters_to_megabytes(sb,
202 ocfs2_local_alloc_size(sb) * 8); 202 ocfs2_local_alloc_size(sb) * 8);
203 203
204 mlog(0, "requested: %dM, max: %uM, default: %uM\n", 204 trace_ocfs2_la_set_sizes(requested_mb, la_max_mb, la_default_mb);
205 requested_mb, la_max_mb, la_default_mb);
206 205
207 if (requested_mb == -1) { 206 if (requested_mb == -1) {
208 /* No user request - use defaults */ 207 /* No user request - use defaults */
@@ -276,8 +275,8 @@ int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
276 275
277 ret = 1; 276 ret = 1;
278bail: 277bail:
279 mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", 278 trace_ocfs2_alloc_should_use_local(
280 osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); 279 (unsigned long long)bits, osb->local_alloc_state, la_bits, ret);
281 spin_unlock(&osb->osb_lock); 280 spin_unlock(&osb->osb_lock);
282 return ret; 281 return ret;
283} 282}
@@ -291,8 +290,6 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
291 struct inode *inode = NULL; 290 struct inode *inode = NULL;
292 struct ocfs2_local_alloc *la; 291 struct ocfs2_local_alloc *la;
293 292
294 mlog_entry_void();
295
296 if (osb->local_alloc_bits == 0) 293 if (osb->local_alloc_bits == 0)
297 goto bail; 294 goto bail;
298 295
@@ -364,9 +361,10 @@ bail:
364 if (inode) 361 if (inode)
365 iput(inode); 362 iput(inode);
366 363
367 mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits); 364 trace_ocfs2_load_local_alloc(osb->local_alloc_bits);
368 365
369 mlog_exit(status); 366 if (status)
367 mlog_errno(status);
370 return status; 368 return status;
371} 369}
372 370
@@ -388,8 +386,6 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
388 struct ocfs2_dinode *alloc_copy = NULL; 386 struct ocfs2_dinode *alloc_copy = NULL;
389 struct ocfs2_dinode *alloc = NULL; 387 struct ocfs2_dinode *alloc = NULL;
390 388
391 mlog_entry_void();
392
393 cancel_delayed_work(&osb->la_enable_wq); 389 cancel_delayed_work(&osb->la_enable_wq);
394 flush_workqueue(ocfs2_wq); 390 flush_workqueue(ocfs2_wq);
395 391
@@ -482,8 +478,6 @@ out:
482 478
483 if (alloc_copy) 479 if (alloc_copy)
484 kfree(alloc_copy); 480 kfree(alloc_copy);
485
486 mlog_exit_void();
487} 481}
488 482
489/* 483/*
@@ -502,7 +496,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
502 struct inode *inode = NULL; 496 struct inode *inode = NULL;
503 struct ocfs2_dinode *alloc; 497 struct ocfs2_dinode *alloc;
504 498
505 mlog_entry("(slot_num = %d)\n", slot_num); 499 trace_ocfs2_begin_local_alloc_recovery(slot_num);
506 500
507 *alloc_copy = NULL; 501 *alloc_copy = NULL;
508 502
@@ -552,7 +546,8 @@ bail:
552 iput(inode); 546 iput(inode);
553 } 547 }
554 548
555 mlog_exit(status); 549 if (status)
550 mlog_errno(status);
556 return status; 551 return status;
557} 552}
558 553
@@ -570,8 +565,6 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
570 struct buffer_head *main_bm_bh = NULL; 565 struct buffer_head *main_bm_bh = NULL;
571 struct inode *main_bm_inode; 566 struct inode *main_bm_inode;
572 567
573 mlog_entry_void();
574
575 main_bm_inode = ocfs2_get_system_file_inode(osb, 568 main_bm_inode = ocfs2_get_system_file_inode(osb,
576 GLOBAL_BITMAP_SYSTEM_INODE, 569 GLOBAL_BITMAP_SYSTEM_INODE,
577 OCFS2_INVALID_SLOT); 570 OCFS2_INVALID_SLOT);
@@ -620,7 +613,8 @@ out_mutex:
620out: 613out:
621 if (!status) 614 if (!status)
622 ocfs2_init_steal_slots(osb); 615 ocfs2_init_steal_slots(osb);
623 mlog_exit(status); 616 if (status)
617 mlog_errno(status);
624 return status; 618 return status;
625} 619}
626 620
@@ -640,8 +634,6 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
640 struct inode *local_alloc_inode; 634 struct inode *local_alloc_inode;
641 unsigned int free_bits; 635 unsigned int free_bits;
642 636
643 mlog_entry_void();
644
645 BUG_ON(!ac); 637 BUG_ON(!ac);
646 638
647 local_alloc_inode = 639 local_alloc_inode =
@@ -712,10 +704,6 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
712 goto bail; 704 goto bail;
713 } 705 }
714 706
715 if (ac->ac_max_block)
716 mlog(0, "Calling in_range for max block %llu\n",
717 (unsigned long long)ac->ac_max_block);
718
719 ac->ac_inode = local_alloc_inode; 707 ac->ac_inode = local_alloc_inode;
720 /* We should never use localalloc from another slot */ 708 /* We should never use localalloc from another slot */
721 ac->ac_alloc_slot = osb->slot_num; 709 ac->ac_alloc_slot = osb->slot_num;
@@ -729,10 +717,12 @@ bail:
729 iput(local_alloc_inode); 717 iput(local_alloc_inode);
730 } 718 }
731 719
732 mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num, 720 trace_ocfs2_reserve_local_alloc_bits(
733 status); 721 (unsigned long long)ac->ac_max_block,
722 bits_wanted, osb->slot_num, status);
734 723
735 mlog_exit(status); 724 if (status)
725 mlog_errno(status);
736 return status; 726 return status;
737} 727}
738 728
@@ -749,7 +739,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
749 struct ocfs2_dinode *alloc; 739 struct ocfs2_dinode *alloc;
750 struct ocfs2_local_alloc *la; 740 struct ocfs2_local_alloc *la;
751 741
752 mlog_entry_void();
753 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 742 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
754 743
755 local_alloc_inode = ac->ac_inode; 744 local_alloc_inode = ac->ac_inode;
@@ -788,7 +777,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
788 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 777 ocfs2_journal_dirty(handle, osb->local_alloc_bh);
789 778
790bail: 779bail:
791 mlog_exit(status); 780 if (status)
781 mlog_errno(status);
792 return status; 782 return status;
793} 783}
794 784
@@ -799,13 +789,11 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
799 u32 count = 0; 789 u32 count = 0;
800 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 790 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
801 791
802 mlog_entry_void();
803
804 buffer = la->la_bitmap; 792 buffer = la->la_bitmap;
805 for (i = 0; i < le16_to_cpu(la->la_size); i++) 793 for (i = 0; i < le16_to_cpu(la->la_size); i++)
806 count += hweight8(buffer[i]); 794 count += hweight8(buffer[i]);
807 795
808 mlog_exit(count); 796 trace_ocfs2_local_alloc_count_bits(count);
809 return count; 797 return count;
810} 798}
811 799
@@ -820,10 +808,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
820 void *bitmap = NULL; 808 void *bitmap = NULL;
821 struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap; 809 struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap;
822 810
823 mlog_entry("(numbits wanted = %u)\n", *numbits);
824
825 if (!alloc->id1.bitmap1.i_total) { 811 if (!alloc->id1.bitmap1.i_total) {
826 mlog(0, "No bits in my window!\n");
827 bitoff = -1; 812 bitoff = -1;
828 goto bail; 813 goto bail;
829 } 814 }
@@ -883,8 +868,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
883 } 868 }
884 } 869 }
885 870
886 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff, 871 trace_ocfs2_local_alloc_find_clear_bits_search_bitmap(bitoff, numfound);
887 numfound);
888 872
889 if (numfound == *numbits) 873 if (numfound == *numbits)
890 bitoff = startoff - numfound; 874 bitoff = startoff - numfound;
@@ -895,7 +879,10 @@ bail:
895 if (local_resv) 879 if (local_resv)
896 ocfs2_resv_discard(resmap, resv); 880 ocfs2_resv_discard(resmap, resv);
897 881
898 mlog_exit(bitoff); 882 trace_ocfs2_local_alloc_find_clear_bits(*numbits,
883 le32_to_cpu(alloc->id1.bitmap1.i_total),
884 bitoff, numfound);
885
899 return bitoff; 886 return bitoff;
900} 887}
901 888
@@ -903,15 +890,12 @@ static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc)
903{ 890{
904 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 891 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
905 int i; 892 int i;
906 mlog_entry_void();
907 893
908 alloc->id1.bitmap1.i_total = 0; 894 alloc->id1.bitmap1.i_total = 0;
909 alloc->id1.bitmap1.i_used = 0; 895 alloc->id1.bitmap1.i_used = 0;
910 la->la_bm_off = 0; 896 la->la_bm_off = 0;
911 for(i = 0; i < le16_to_cpu(la->la_size); i++) 897 for(i = 0; i < le16_to_cpu(la->la_size); i++)
912 la->la_bitmap[i] = 0; 898 la->la_bitmap[i] = 0;
913
914 mlog_exit_void();
915} 899}
916 900
917#if 0 901#if 0
@@ -952,18 +936,16 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
952 void *bitmap; 936 void *bitmap;
953 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 937 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
954 938
955 mlog_entry("total = %u, used = %u\n", 939 trace_ocfs2_sync_local_to_main(
956 le32_to_cpu(alloc->id1.bitmap1.i_total), 940 le32_to_cpu(alloc->id1.bitmap1.i_total),
957 le32_to_cpu(alloc->id1.bitmap1.i_used)); 941 le32_to_cpu(alloc->id1.bitmap1.i_used));
958 942
959 if (!alloc->id1.bitmap1.i_total) { 943 if (!alloc->id1.bitmap1.i_total) {
960 mlog(0, "nothing to sync!\n");
961 goto bail; 944 goto bail;
962 } 945 }
963 946
964 if (le32_to_cpu(alloc->id1.bitmap1.i_used) == 947 if (le32_to_cpu(alloc->id1.bitmap1.i_used) ==
965 le32_to_cpu(alloc->id1.bitmap1.i_total)) { 948 le32_to_cpu(alloc->id1.bitmap1.i_total)) {
966 mlog(0, "all bits were taken!\n");
967 goto bail; 949 goto bail;
968 } 950 }
969 951
@@ -985,8 +967,7 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
985 ocfs2_clusters_to_blocks(osb->sb, 967 ocfs2_clusters_to_blocks(osb->sb,
986 start - count); 968 start - count);
987 969
988 mlog(0, "freeing %u bits starting at local alloc bit " 970 trace_ocfs2_sync_local_to_main_free(
989 "%u (la_start_blk = %llu, blkno = %llu)\n",
990 count, start - count, 971 count, start - count,
991 (unsigned long long)la_start_blk, 972 (unsigned long long)la_start_blk,
992 (unsigned long long)blkno); 973 (unsigned long long)blkno);
@@ -1007,7 +988,8 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
1007 } 988 }
1008 989
1009bail: 990bail:
1010 mlog_exit(status); 991 if (status)
992 mlog_errno(status);
1011 return status; 993 return status;
1012} 994}
1013 995
@@ -1132,7 +1114,8 @@ bail:
1132 *ac = NULL; 1114 *ac = NULL;
1133 } 1115 }
1134 1116
1135 mlog_exit(status); 1117 if (status)
1118 mlog_errno(status);
1136 return status; 1119 return status;
1137} 1120}
1138 1121
@@ -1148,17 +1131,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
1148 struct ocfs2_dinode *alloc = NULL; 1131 struct ocfs2_dinode *alloc = NULL;
1149 struct ocfs2_local_alloc *la; 1132 struct ocfs2_local_alloc *la;
1150 1133
1151 mlog_entry_void();
1152
1153 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1134 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
1154 la = OCFS2_LOCAL_ALLOC(alloc); 1135 la = OCFS2_LOCAL_ALLOC(alloc);
1155 1136
1156 if (alloc->id1.bitmap1.i_total) 1137 trace_ocfs2_local_alloc_new_window(
1157 mlog(0, "asking me to alloc a new window over a non-empty " 1138 le32_to_cpu(alloc->id1.bitmap1.i_total),
1158 "one\n"); 1139 osb->local_alloc_bits);
1159
1160 mlog(0, "Allocating %u clusters for a new window.\n",
1161 osb->local_alloc_bits);
1162 1140
1163 /* Instruct the allocation code to try the most recently used 1141 /* Instruct the allocation code to try the most recently used
1164 * cluster group. We'll re-record the group used this pass 1142 * cluster group. We'll re-record the group used this pass
@@ -1220,13 +1198,13 @@ retry_enospc:
1220 ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count, 1198 ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count,
1221 OCFS2_LOCAL_ALLOC(alloc)->la_bitmap); 1199 OCFS2_LOCAL_ALLOC(alloc)->la_bitmap);
1222 1200
1223 mlog(0, "New window allocated:\n"); 1201 trace_ocfs2_local_alloc_new_window_result(
1224 mlog(0, "window la_bm_off = %u\n", 1202 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off,
1225 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 1203 le32_to_cpu(alloc->id1.bitmap1.i_total));
1226 mlog(0, "window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total));
1227 1204
1228bail: 1205bail:
1229 mlog_exit(status); 1206 if (status)
1207 mlog_errno(status);
1230 return status; 1208 return status;
1231} 1209}
1232 1210
@@ -1243,8 +1221,6 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1243 struct ocfs2_dinode *alloc_copy = NULL; 1221 struct ocfs2_dinode *alloc_copy = NULL;
1244 struct ocfs2_alloc_context *ac = NULL; 1222 struct ocfs2_alloc_context *ac = NULL;
1245 1223
1246 mlog_entry_void();
1247
1248 ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); 1224 ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
1249 1225
1250 /* This will lock the main bitmap for us. */ 1226 /* This will lock the main bitmap for us. */
@@ -1324,7 +1300,8 @@ bail:
1324 if (ac) 1300 if (ac)
1325 ocfs2_free_alloc_context(ac); 1301 ocfs2_free_alloc_context(ac);
1326 1302
1327 mlog_exit(status); 1303 if (status)
1304 mlog_errno(status);
1328 return status; 1305 return status;
1329} 1306}
1330 1307
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index b5cb3ede9408..e57c804069ea 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -26,7 +26,6 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/fcntl.h> 27#include <linux/fcntl.h>
28 28
29#define MLOG_MASK_PREFIX ML_INODE
30#include <cluster/masklog.h> 29#include <cluster/masklog.h>
31 30
32#include "ocfs2.h" 31#include "ocfs2.h"
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 7e32db9c2c99..3e9393ca39eb 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -31,7 +31,6 @@
31#include <linux/signal.h> 31#include <linux/signal.h>
32#include <linux/rbtree.h> 32#include <linux/rbtree.h>
33 33
34#define MLOG_MASK_PREFIX ML_FILE_IO
35#include <cluster/masklog.h> 34#include <cluster/masklog.h>
36 35
37#include "ocfs2.h" 36#include "ocfs2.h"
@@ -42,6 +41,7 @@
42#include "inode.h" 41#include "inode.h"
43#include "mmap.h" 42#include "mmap.h"
44#include "super.h" 43#include "super.h"
44#include "ocfs2_trace.h"
45 45
46 46
47static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) 47static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
@@ -49,13 +49,12 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
49 sigset_t oldset; 49 sigset_t oldset;
50 int ret; 50 int ret;
51 51
52 mlog_entry("(area=%p, page offset=%lu)\n", area, vmf->pgoff);
53
54 ocfs2_block_signals(&oldset); 52 ocfs2_block_signals(&oldset);
55 ret = filemap_fault(area, vmf); 53 ret = filemap_fault(area, vmf);
56 ocfs2_unblock_signals(&oldset); 54 ocfs2_unblock_signals(&oldset);
57 55
58 mlog_exit_ptr(vmf->page); 56 trace_ocfs2_fault(OCFS2_I(area->vm_file->f_mapping->host)->ip_blkno,
57 area, vmf->page, vmf->pgoff);
59 return ret; 58 return ret;
60} 59}
61 60
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index d6c25d76b537..28f2cc1080d8 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -42,7 +42,6 @@
42#include <linux/highmem.h> 42#include <linux/highmem.h>
43#include <linux/quotaops.h> 43#include <linux/quotaops.h>
44 44
45#define MLOG_MASK_PREFIX ML_NAMEI
46#include <cluster/masklog.h> 45#include <cluster/masklog.h>
47 46
48#include "ocfs2.h" 47#include "ocfs2.h"
@@ -63,6 +62,7 @@
63#include "uptodate.h" 62#include "uptodate.h"
64#include "xattr.h" 63#include "xattr.h"
65#include "acl.h" 64#include "acl.h"
65#include "ocfs2_trace.h"
66 66
67#include "buffer_head_io.h" 67#include "buffer_head_io.h"
68 68
@@ -106,17 +106,15 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
106 struct dentry *ret; 106 struct dentry *ret;
107 struct ocfs2_inode_info *oi; 107 struct ocfs2_inode_info *oi;
108 108
109 mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, 109 trace_ocfs2_lookup(dir, dentry, dentry->d_name.len,
110 dentry->d_name.len, dentry->d_name.name); 110 dentry->d_name.name,
111 (unsigned long long)OCFS2_I(dir)->ip_blkno, 0);
111 112
112 if (dentry->d_name.len > OCFS2_MAX_FILENAME_LEN) { 113 if (dentry->d_name.len > OCFS2_MAX_FILENAME_LEN) {
113 ret = ERR_PTR(-ENAMETOOLONG); 114 ret = ERR_PTR(-ENAMETOOLONG);
114 goto bail; 115 goto bail;
115 } 116 }
116 117
117 mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
118 dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
119
120 status = ocfs2_inode_lock_nested(dir, NULL, 0, OI_LS_PARENT); 118 status = ocfs2_inode_lock_nested(dir, NULL, 0, OI_LS_PARENT);
121 if (status < 0) { 119 if (status < 0) {
122 if (status != -ENOENT) 120 if (status != -ENOENT)
@@ -182,7 +180,7 @@ bail_unlock:
182 180
183bail: 181bail:
184 182
185 mlog_exit_ptr(ret); 183 trace_ocfs2_lookup_ret(ret);
186 184
187 return ret; 185 return ret;
188} 186}
@@ -235,9 +233,9 @@ static int ocfs2_mknod(struct inode *dir,
235 sigset_t oldset; 233 sigset_t oldset;
236 int did_block_signals = 0; 234 int did_block_signals = 0;
237 235
238 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, 236 trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name,
239 (unsigned long)dev, dentry->d_name.len, 237 (unsigned long long)OCFS2_I(dir)->ip_blkno,
240 dentry->d_name.name); 238 (unsigned long)dev, mode);
241 239
242 dquot_initialize(dir); 240 dquot_initialize(dir);
243 241
@@ -354,10 +352,6 @@ static int ocfs2_mknod(struct inode *dir,
354 goto leave; 352 goto leave;
355 did_quota_inode = 1; 353 did_quota_inode = 1;
356 354
357 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
358 inode->i_mode, (unsigned long)dev, dentry->d_name.len,
359 dentry->d_name.name);
360
361 /* do the real work now. */ 355 /* do the real work now. */
362 status = ocfs2_mknod_locked(osb, dir, inode, dev, 356 status = ocfs2_mknod_locked(osb, dir, inode, dev,
363 &new_fe_bh, parent_fe_bh, handle, 357 &new_fe_bh, parent_fe_bh, handle,
@@ -436,9 +430,6 @@ leave:
436 if (did_block_signals) 430 if (did_block_signals)
437 ocfs2_unblock_signals(&oldset); 431 ocfs2_unblock_signals(&oldset);
438 432
439 if (status == -ENOSPC)
440 mlog(0, "Disk is full\n");
441
442 brelse(new_fe_bh); 433 brelse(new_fe_bh);
443 brelse(parent_fe_bh); 434 brelse(parent_fe_bh);
444 kfree(si.name); 435 kfree(si.name);
@@ -466,7 +457,8 @@ leave:
466 iput(inode); 457 iput(inode);
467 } 458 }
468 459
469 mlog_exit(status); 460 if (status)
461 mlog_errno(status);
470 462
471 return status; 463 return status;
472} 464}
@@ -577,7 +569,8 @@ leave:
577 } 569 }
578 } 570 }
579 571
580 mlog_exit(status); 572 if (status)
573 mlog_errno(status);
581 return status; 574 return status;
582} 575}
583 576
@@ -615,10 +608,11 @@ static int ocfs2_mkdir(struct inode *dir,
615{ 608{
616 int ret; 609 int ret;
617 610
618 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode, 611 trace_ocfs2_mkdir(dir, dentry, dentry->d_name.len, dentry->d_name.name,
619 dentry->d_name.len, dentry->d_name.name); 612 OCFS2_I(dir)->ip_blkno, mode);
620 ret = ocfs2_mknod(dir, dentry, mode | S_IFDIR, 0); 613 ret = ocfs2_mknod(dir, dentry, mode | S_IFDIR, 0);
621 mlog_exit(ret); 614 if (ret)
615 mlog_errno(ret);
622 616
623 return ret; 617 return ret;
624} 618}
@@ -630,10 +624,11 @@ static int ocfs2_create(struct inode *dir,
630{ 624{
631 int ret; 625 int ret;
632 626
633 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode, 627 trace_ocfs2_create(dir, dentry, dentry->d_name.len, dentry->d_name.name,
634 dentry->d_name.len, dentry->d_name.name); 628 (unsigned long long)OCFS2_I(dir)->ip_blkno, mode);
635 ret = ocfs2_mknod(dir, dentry, mode | S_IFREG, 0); 629 ret = ocfs2_mknod(dir, dentry, mode | S_IFREG, 0);
636 mlog_exit(ret); 630 if (ret)
631 mlog_errno(ret);
637 632
638 return ret; 633 return ret;
639} 634}
@@ -652,9 +647,9 @@ static int ocfs2_link(struct dentry *old_dentry,
652 struct ocfs2_dir_lookup_result lookup = { NULL, }; 647 struct ocfs2_dir_lookup_result lookup = { NULL, };
653 sigset_t oldset; 648 sigset_t oldset;
654 649
655 mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino, 650 trace_ocfs2_link((unsigned long long)OCFS2_I(inode)->ip_blkno,
656 old_dentry->d_name.len, old_dentry->d_name.name, 651 old_dentry->d_name.len, old_dentry->d_name.name,
657 dentry->d_name.len, dentry->d_name.name); 652 dentry->d_name.len, dentry->d_name.name);
658 653
659 if (S_ISDIR(inode->i_mode)) 654 if (S_ISDIR(inode->i_mode))
660 return -EPERM; 655 return -EPERM;
@@ -757,7 +752,8 @@ out:
757 752
758 ocfs2_free_dir_lookup_result(&lookup); 753 ocfs2_free_dir_lookup_result(&lookup);
759 754
760 mlog_exit(err); 755 if (err)
756 mlog_errno(err);
761 757
762 return err; 758 return err;
763} 759}
@@ -809,19 +805,17 @@ static int ocfs2_unlink(struct inode *dir,
809 struct ocfs2_dir_lookup_result lookup = { NULL, }; 805 struct ocfs2_dir_lookup_result lookup = { NULL, };
810 struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; 806 struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
811 807
812 mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, 808 trace_ocfs2_unlink(dir, dentry, dentry->d_name.len,
813 dentry->d_name.len, dentry->d_name.name); 809 dentry->d_name.name,
810 (unsigned long long)OCFS2_I(dir)->ip_blkno,
811 (unsigned long long)OCFS2_I(inode)->ip_blkno);
814 812
815 dquot_initialize(dir); 813 dquot_initialize(dir);
816 814
817 BUG_ON(dentry->d_parent->d_inode != dir); 815 BUG_ON(dentry->d_parent->d_inode != dir);
818 816
819 mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 817 if (inode == osb->root_inode)
820
821 if (inode == osb->root_inode) {
822 mlog(0, "Cannot delete the root directory\n");
823 return -EPERM; 818 return -EPERM;
824 }
825 819
826 status = ocfs2_inode_lock_nested(dir, &parent_node_bh, 1, 820 status = ocfs2_inode_lock_nested(dir, &parent_node_bh, 1,
827 OI_LS_PARENT); 821 OI_LS_PARENT);
@@ -843,9 +837,10 @@ static int ocfs2_unlink(struct inode *dir,
843 if (OCFS2_I(inode)->ip_blkno != blkno) { 837 if (OCFS2_I(inode)->ip_blkno != blkno) {
844 status = -ENOENT; 838 status = -ENOENT;
845 839
846 mlog(0, "ip_blkno %llu != dirent blkno %llu ip_flags = %x\n", 840 trace_ocfs2_unlink_noent(
847 (unsigned long long)OCFS2_I(inode)->ip_blkno, 841 (unsigned long long)OCFS2_I(inode)->ip_blkno,
848 (unsigned long long)blkno, OCFS2_I(inode)->ip_flags); 842 (unsigned long long)blkno,
843 OCFS2_I(inode)->ip_flags);
849 goto leave; 844 goto leave;
850 } 845 }
851 846
@@ -954,7 +949,8 @@ leave:
954 ocfs2_free_dir_lookup_result(&orphan_insert); 949 ocfs2_free_dir_lookup_result(&orphan_insert);
955 ocfs2_free_dir_lookup_result(&lookup); 950 ocfs2_free_dir_lookup_result(&lookup);
956 951
957 mlog_exit(status); 952 if (status)
953 mlog_errno(status);
958 954
959 return status; 955 return status;
960} 956}
@@ -975,9 +971,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
975 struct buffer_head **tmpbh; 971 struct buffer_head **tmpbh;
976 struct inode *tmpinode; 972 struct inode *tmpinode;
977 973
978 mlog_entry("(inode1 = %llu, inode2 = %llu)\n", 974 trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno,
979 (unsigned long long)oi1->ip_blkno, 975 (unsigned long long)oi2->ip_blkno);
980 (unsigned long long)oi2->ip_blkno);
981 976
982 if (*bh1) 977 if (*bh1)
983 *bh1 = NULL; 978 *bh1 = NULL;
@@ -988,7 +983,6 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
988 if (oi1->ip_blkno != oi2->ip_blkno) { 983 if (oi1->ip_blkno != oi2->ip_blkno) {
989 if (oi1->ip_blkno < oi2->ip_blkno) { 984 if (oi1->ip_blkno < oi2->ip_blkno) {
990 /* switch id1 and id2 around */ 985 /* switch id1 and id2 around */
991 mlog(0, "switching them around...\n");
992 tmpbh = bh2; 986 tmpbh = bh2;
993 bh2 = bh1; 987 bh2 = bh1;
994 bh1 = tmpbh; 988 bh1 = tmpbh;
@@ -1024,8 +1018,13 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
1024 mlog_errno(status); 1018 mlog_errno(status);
1025 } 1019 }
1026 1020
1021 trace_ocfs2_double_lock_end(
1022 (unsigned long long)OCFS2_I(inode1)->ip_blkno,
1023 (unsigned long long)OCFS2_I(inode2)->ip_blkno);
1024
1027bail: 1025bail:
1028 mlog_exit(status); 1026 if (status)
1027 mlog_errno(status);
1029 return status; 1028 return status;
1030} 1029}
1031 1030
@@ -1067,10 +1066,9 @@ static int ocfs2_rename(struct inode *old_dir,
1067 /* At some point it might be nice to break this function up a 1066 /* At some point it might be nice to break this function up a
1068 * bit. */ 1067 * bit. */
1069 1068
1070 mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p, from='%.*s' to='%.*s')\n", 1069 trace_ocfs2_rename(old_dir, old_dentry, new_dir, new_dentry,
1071 old_dir, old_dentry, new_dir, new_dentry, 1070 old_dentry->d_name.len, old_dentry->d_name.name,
1072 old_dentry->d_name.len, old_dentry->d_name.name, 1071 new_dentry->d_name.len, new_dentry->d_name.name);
1073 new_dentry->d_name.len, new_dentry->d_name.name);
1074 1072
1075 dquot_initialize(old_dir); 1073 dquot_initialize(old_dir);
1076 dquot_initialize(new_dir); 1074 dquot_initialize(new_dir);
@@ -1227,16 +1225,15 @@ static int ocfs2_rename(struct inode *old_dir,
1227 if (!new_inode) { 1225 if (!new_inode) {
1228 status = -EACCES; 1226 status = -EACCES;
1229 1227
1230 mlog(0, "We found an inode for name %.*s but VFS " 1228 trace_ocfs2_rename_target_exists(new_dentry->d_name.len,
1231 "didn't give us one.\n", new_dentry->d_name.len, 1229 new_dentry->d_name.name);
1232 new_dentry->d_name.name);
1233 goto bail; 1230 goto bail;
1234 } 1231 }
1235 1232
1236 if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) { 1233 if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) {
1237 status = -EACCES; 1234 status = -EACCES;
1238 1235
1239 mlog(0, "Inode %llu and dir %llu disagree. flags = %x\n", 1236 trace_ocfs2_rename_disagree(
1240 (unsigned long long)OCFS2_I(new_inode)->ip_blkno, 1237 (unsigned long long)OCFS2_I(new_inode)->ip_blkno,
1241 (unsigned long long)newfe_blkno, 1238 (unsigned long long)newfe_blkno,
1242 OCFS2_I(new_inode)->ip_flags); 1239 OCFS2_I(new_inode)->ip_flags);
@@ -1259,8 +1256,7 @@ static int ocfs2_rename(struct inode *old_dir,
1259 1256
1260 newfe = (struct ocfs2_dinode *) newfe_bh->b_data; 1257 newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
1261 1258
1262 mlog(0, "aha rename over existing... new_blkno=%llu " 1259 trace_ocfs2_rename_over_existing(
1263 "newfebh=%p bhblocknr=%llu\n",
1264 (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ? 1260 (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
1265 (unsigned long long)newfe_bh->b_blocknr : 0ULL); 1261 (unsigned long long)newfe_bh->b_blocknr : 0ULL);
1266 1262
@@ -1476,7 +1472,8 @@ bail:
1476 brelse(old_dir_bh); 1472 brelse(old_dir_bh);
1477 brelse(new_dir_bh); 1473 brelse(new_dir_bh);
1478 1474
1479 mlog_exit(status); 1475 if (status)
1476 mlog_errno(status);
1480 1477
1481 return status; 1478 return status;
1482} 1479}
@@ -1501,9 +1498,8 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
1501 * write i_size + 1 bytes. */ 1498 * write i_size + 1 bytes. */
1502 blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits; 1499 blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
1503 1500
1504 mlog_entry("i_blocks = %llu, i_size = %llu, blocks = %d\n", 1501 trace_ocfs2_create_symlink_data((unsigned long long)inode->i_blocks,
1505 (unsigned long long)inode->i_blocks, 1502 i_size_read(inode), blocks);
1506 i_size_read(inode), blocks);
1507 1503
1508 /* Sanity check -- make sure we're going to fit. */ 1504 /* Sanity check -- make sure we're going to fit. */
1509 if (bytes_left > 1505 if (bytes_left >
@@ -1579,7 +1575,8 @@ bail:
1579 kfree(bhs); 1575 kfree(bhs);
1580 } 1576 }
1581 1577
1582 mlog_exit(status); 1578 if (status)
1579 mlog_errno(status);
1583 return status; 1580 return status;
1584} 1581}
1585 1582
@@ -1610,8 +1607,8 @@ static int ocfs2_symlink(struct inode *dir,
1610 sigset_t oldset; 1607 sigset_t oldset;
1611 int did_block_signals = 0; 1608 int did_block_signals = 0;
1612 1609
1613 mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, 1610 trace_ocfs2_symlink_begin(dir, dentry, symname,
1614 dentry, symname, dentry->d_name.len, dentry->d_name.name); 1611 dentry->d_name.len, dentry->d_name.name);
1615 1612
1616 dquot_initialize(dir); 1613 dquot_initialize(dir);
1617 1614
@@ -1713,9 +1710,10 @@ static int ocfs2_symlink(struct inode *dir,
1713 goto bail; 1710 goto bail;
1714 did_quota_inode = 1; 1711 did_quota_inode = 1;
1715 1712
1716 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, 1713 trace_ocfs2_symlink_create(dir, dentry, dentry->d_name.len,
1717 inode->i_mode, dentry->d_name.len, 1714 dentry->d_name.name,
1718 dentry->d_name.name); 1715 (unsigned long long)OCFS2_I(dir)->ip_blkno,
1716 inode->i_mode);
1719 1717
1720 status = ocfs2_mknod_locked(osb, dir, inode, 1718 status = ocfs2_mknod_locked(osb, dir, inode,
1721 0, &new_fe_bh, parent_fe_bh, handle, 1719 0, &new_fe_bh, parent_fe_bh, handle,
@@ -1835,7 +1833,8 @@ bail:
1835 iput(inode); 1833 iput(inode);
1836 } 1834 }
1837 1835
1838 mlog_exit(status); 1836 if (status)
1837 mlog_errno(status);
1839 1838
1840 return status; 1839 return status;
1841} 1840}
@@ -1844,8 +1843,6 @@ static int ocfs2_blkno_stringify(u64 blkno, char *name)
1844{ 1843{
1845 int status, namelen; 1844 int status, namelen;
1846 1845
1847 mlog_entry_void();
1848
1849 namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016llx", 1846 namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016llx",
1850 (long long)blkno); 1847 (long long)blkno);
1851 if (namelen <= 0) { 1848 if (namelen <= 0) {
@@ -1862,12 +1859,12 @@ static int ocfs2_blkno_stringify(u64 blkno, char *name)
1862 goto bail; 1859 goto bail;
1863 } 1860 }
1864 1861
1865 mlog(0, "built filename '%s' for orphan dir (len=%d)\n", name, 1862 trace_ocfs2_blkno_stringify(blkno, name, namelen);
1866 namelen);
1867 1863
1868 status = 0; 1864 status = 0;
1869bail: 1865bail:
1870 mlog_exit(status); 1866 if (status < 0)
1867 mlog_errno(status);
1871 return status; 1868 return status;
1872} 1869}
1873 1870
@@ -1980,7 +1977,8 @@ out:
1980 iput(orphan_dir_inode); 1977 iput(orphan_dir_inode);
1981 } 1978 }
1982 1979
1983 mlog_exit(ret); 1980 if (ret)
1981 mlog_errno(ret);
1984 return ret; 1982 return ret;
1985} 1983}
1986 1984
@@ -1997,7 +1995,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1997 struct ocfs2_dinode *orphan_fe; 1995 struct ocfs2_dinode *orphan_fe;
1998 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; 1996 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
1999 1997
2000 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); 1998 trace_ocfs2_orphan_add_begin(
1999 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2001 2000
2002 status = ocfs2_read_inode_block(orphan_dir_inode, &orphan_dir_bh); 2001 status = ocfs2_read_inode_block(orphan_dir_inode, &orphan_dir_bh);
2003 if (status < 0) { 2002 if (status < 0) {
@@ -2056,13 +2055,14 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
2056 2055
2057 ocfs2_journal_dirty(handle, fe_bh); 2056 ocfs2_journal_dirty(handle, fe_bh);
2058 2057
2059 mlog(0, "Inode %llu orphaned in slot %d\n", 2058 trace_ocfs2_orphan_add_end((unsigned long long)OCFS2_I(inode)->ip_blkno,
2060 (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); 2059 osb->slot_num);
2061 2060
2062leave: 2061leave:
2063 brelse(orphan_dir_bh); 2062 brelse(orphan_dir_bh);
2064 2063
2065 mlog_exit(status); 2064 if (status)
2065 mlog_errno(status);
2066 return status; 2066 return status;
2067} 2067}
2068 2068
@@ -2078,17 +2078,15 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2078 int status = 0; 2078 int status = 0;
2079 struct ocfs2_dir_lookup_result lookup = { NULL, }; 2079 struct ocfs2_dir_lookup_result lookup = { NULL, };
2080 2080
2081 mlog_entry_void();
2082
2083 status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name); 2081 status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
2084 if (status < 0) { 2082 if (status < 0) {
2085 mlog_errno(status); 2083 mlog_errno(status);
2086 goto leave; 2084 goto leave;
2087 } 2085 }
2088 2086
2089 mlog(0, "removing '%s' from orphan dir %llu (namelen=%d)\n", 2087 trace_ocfs2_orphan_del(
2090 name, (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, 2088 (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
2091 OCFS2_ORPHAN_NAMELEN); 2089 name, OCFS2_ORPHAN_NAMELEN);
2092 2090
2093 /* find it's spot in the orphan directory */ 2091 /* find it's spot in the orphan directory */
2094 status = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN, orphan_dir_inode, 2092 status = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN, orphan_dir_inode,
@@ -2124,7 +2122,8 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2124leave: 2122leave:
2125 ocfs2_free_dir_lookup_result(&lookup); 2123 ocfs2_free_dir_lookup_result(&lookup);
2126 2124
2127 mlog_exit(status); 2125 if (status)
2126 mlog_errno(status);
2128 return status; 2127 return status;
2129} 2128}
2130 2129
@@ -2321,9 +2320,6 @@ leave:
2321 iput(orphan_dir); 2320 iput(orphan_dir);
2322 } 2321 }
2323 2322
2324 if (status == -ENOSPC)
2325 mlog(0, "Disk is full\n");
2326
2327 if ((status < 0) && inode) { 2323 if ((status < 0) && inode) {
2328 clear_nlink(inode); 2324 clear_nlink(inode);
2329 iput(inode); 2325 iput(inode);
@@ -2358,8 +2354,10 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2358 struct buffer_head *di_bh = NULL; 2354 struct buffer_head *di_bh = NULL;
2359 struct ocfs2_dir_lookup_result lookup = { NULL, }; 2355 struct ocfs2_dir_lookup_result lookup = { NULL, };
2360 2356
2361 mlog_entry("(0x%p, 0x%p, %.*s')\n", dir, dentry, 2357 trace_ocfs2_mv_orphaned_inode_to_new(dir, dentry,
2362 dentry->d_name.len, dentry->d_name.name); 2358 dentry->d_name.len, dentry->d_name.name,
2359 (unsigned long long)OCFS2_I(dir)->ip_blkno,
2360 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2363 2361
2364 status = ocfs2_inode_lock(dir, &parent_di_bh, 1); 2362 status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
2365 if (status < 0) { 2363 if (status < 0) {
@@ -2476,7 +2474,8 @@ leave:
2476 2474
2477 ocfs2_free_dir_lookup_result(&lookup); 2475 ocfs2_free_dir_lookup_result(&lookup);
2478 2476
2479 mlog_exit(status); 2477 if (status)
2478 mlog_errno(status);
2480 2479
2481 return status; 2480 return status;
2482} 2481}
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 51cd6898e7f1..409285854f64 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -147,6 +147,17 @@ struct ocfs2_lock_res_ops;
147 147
148typedef void (*ocfs2_lock_callback)(int status, unsigned long data); 148typedef void (*ocfs2_lock_callback)(int status, unsigned long data);
149 149
150#ifdef CONFIG_OCFS2_FS_STATS
151struct ocfs2_lock_stats {
152 u64 ls_total; /* Total wait in NSEC */
153 u32 ls_gets; /* Num acquires */
154 u32 ls_fail; /* Num failed acquires */
155
156 /* Storing max wait in usecs saves 24 bytes per inode */
157 u32 ls_max; /* Max wait in USEC */
158};
159#endif
160
150struct ocfs2_lock_res { 161struct ocfs2_lock_res {
151 void *l_priv; 162 void *l_priv;
152 struct ocfs2_lock_res_ops *l_ops; 163 struct ocfs2_lock_res_ops *l_ops;
@@ -182,15 +193,9 @@ struct ocfs2_lock_res {
182 struct list_head l_debug_list; 193 struct list_head l_debug_list;
183 194
184#ifdef CONFIG_OCFS2_FS_STATS 195#ifdef CONFIG_OCFS2_FS_STATS
185 unsigned long long l_lock_num_prmode; /* PR acquires */ 196 struct ocfs2_lock_stats l_lock_prmode; /* PR mode stats */
186 unsigned long long l_lock_num_exmode; /* EX acquires */ 197 u32 l_lock_refresh; /* Disk refreshes */
187 unsigned int l_lock_num_prmode_failed; /* Failed PR gets */ 198 struct ocfs2_lock_stats l_lock_exmode; /* EX mode stats */
188 unsigned int l_lock_num_exmode_failed; /* Failed EX gets */
189 unsigned long long l_lock_total_prmode; /* Tot wait for PR */
190 unsigned long long l_lock_total_exmode; /* Tot wait for EX */
191 unsigned int l_lock_max_prmode; /* Max wait for PR */
192 unsigned int l_lock_max_exmode; /* Max wait for EX */
193 unsigned int l_lock_refresh; /* Disk refreshes */
194#endif 199#endif
195#ifdef CONFIG_DEBUG_LOCK_ALLOC 200#ifdef CONFIG_DEBUG_LOCK_ALLOC
196 struct lockdep_map l_lockdep_map; 201 struct lockdep_map l_lockdep_map;
@@ -831,18 +836,18 @@ static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb,
831 836
832static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) 837static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap)
833{ 838{
834 ext2_set_bit(bit, bitmap); 839 __test_and_set_bit_le(bit, bitmap);
835} 840}
836#define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) 841#define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr))
837 842
838static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) 843static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap)
839{ 844{
840 ext2_clear_bit(bit, bitmap); 845 __test_and_clear_bit_le(bit, bitmap);
841} 846}
842#define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) 847#define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr))
843 848
844#define ocfs2_test_bit ext2_test_bit 849#define ocfs2_test_bit test_bit_le
845#define ocfs2_find_next_zero_bit ext2_find_next_zero_bit 850#define ocfs2_find_next_zero_bit find_next_zero_bit_le
846#define ocfs2_find_next_bit ext2_find_next_bit 851#define ocfs2_find_next_bit find_next_bit_le
847#endif /* OCFS2_H */ 852#endif /* OCFS2_H */
848 853
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
new file mode 100644
index 000000000000..a1dae5bb54ac
--- /dev/null
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -0,0 +1,2739 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM ocfs2
3
4#if !defined(_TRACE_OCFS2_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_OCFS2_H
6
7#include <linux/tracepoint.h>
8
9DECLARE_EVENT_CLASS(ocfs2__int,
10 TP_PROTO(int num),
11 TP_ARGS(num),
12 TP_STRUCT__entry(
13 __field(int, num)
14 ),
15 TP_fast_assign(
16 __entry->num = num;
17 ),
18 TP_printk("%d", __entry->num)
19);
20
21#define DEFINE_OCFS2_INT_EVENT(name) \
22DEFINE_EVENT(ocfs2__int, name, \
23 TP_PROTO(int num), \
24 TP_ARGS(num))
25
26DECLARE_EVENT_CLASS(ocfs2__uint,
27 TP_PROTO(unsigned int num),
28 TP_ARGS(num),
29 TP_STRUCT__entry(
30 __field( unsigned int, num )
31 ),
32 TP_fast_assign(
33 __entry->num = num;
34 ),
35 TP_printk("%u", __entry->num)
36);
37
38#define DEFINE_OCFS2_UINT_EVENT(name) \
39DEFINE_EVENT(ocfs2__uint, name, \
40 TP_PROTO(unsigned int num), \
41 TP_ARGS(num))
42
43DECLARE_EVENT_CLASS(ocfs2__ull,
44 TP_PROTO(unsigned long long blkno),
45 TP_ARGS(blkno),
46 TP_STRUCT__entry(
47 __field(unsigned long long, blkno)
48 ),
49 TP_fast_assign(
50 __entry->blkno = blkno;
51 ),
52 TP_printk("%llu", __entry->blkno)
53);
54
55#define DEFINE_OCFS2_ULL_EVENT(name) \
56DEFINE_EVENT(ocfs2__ull, name, \
57 TP_PROTO(unsigned long long num), \
58 TP_ARGS(num))
59
60DECLARE_EVENT_CLASS(ocfs2__pointer,
61 TP_PROTO(void *pointer),
62 TP_ARGS(pointer),
63 TP_STRUCT__entry(
64 __field(void *, pointer)
65 ),
66 TP_fast_assign(
67 __entry->pointer = pointer;
68 ),
69 TP_printk("%p", __entry->pointer)
70);
71
72#define DEFINE_OCFS2_POINTER_EVENT(name) \
73DEFINE_EVENT(ocfs2__pointer, name, \
74 TP_PROTO(void *pointer), \
75 TP_ARGS(pointer))
76
77DECLARE_EVENT_CLASS(ocfs2__string,
78 TP_PROTO(const char *name),
79 TP_ARGS(name),
80 TP_STRUCT__entry(
81 __string(name,name)
82 ),
83 TP_fast_assign(
84 __assign_str(name, name);
85 ),
86 TP_printk("%s", __get_str(name))
87);
88
89#define DEFINE_OCFS2_STRING_EVENT(name) \
90DEFINE_EVENT(ocfs2__string, name, \
91 TP_PROTO(const char *name), \
92 TP_ARGS(name))
93
94DECLARE_EVENT_CLASS(ocfs2__int_int,
95 TP_PROTO(int value1, int value2),
96 TP_ARGS(value1, value2),
97 TP_STRUCT__entry(
98 __field(int, value1)
99 __field(int, value2)
100 ),
101 TP_fast_assign(
102 __entry->value1 = value1;
103 __entry->value2 = value2;
104 ),
105 TP_printk("%d %d", __entry->value1, __entry->value2)
106);
107
108#define DEFINE_OCFS2_INT_INT_EVENT(name) \
109DEFINE_EVENT(ocfs2__int_int, name, \
110 TP_PROTO(int val1, int val2), \
111 TP_ARGS(val1, val2))
112
113DECLARE_EVENT_CLASS(ocfs2__uint_int,
114 TP_PROTO(unsigned int value1, int value2),
115 TP_ARGS(value1, value2),
116 TP_STRUCT__entry(
117 __field(unsigned int, value1)
118 __field(int, value2)
119 ),
120 TP_fast_assign(
121 __entry->value1 = value1;
122 __entry->value2 = value2;
123 ),
124 TP_printk("%u %d", __entry->value1, __entry->value2)
125);
126
127#define DEFINE_OCFS2_UINT_INT_EVENT(name) \
128DEFINE_EVENT(ocfs2__uint_int, name, \
129 TP_PROTO(unsigned int val1, int val2), \
130 TP_ARGS(val1, val2))
131
132DECLARE_EVENT_CLASS(ocfs2__uint_uint,
133 TP_PROTO(unsigned int value1, unsigned int value2),
134 TP_ARGS(value1, value2),
135 TP_STRUCT__entry(
136 __field(unsigned int, value1)
137 __field(unsigned int, value2)
138 ),
139 TP_fast_assign(
140 __entry->value1 = value1;
141 __entry->value2 = value2;
142 ),
143 TP_printk("%u %u", __entry->value1, __entry->value2)
144);
145
146#define DEFINE_OCFS2_UINT_UINT_EVENT(name) \
147DEFINE_EVENT(ocfs2__uint_uint, name, \
148 TP_PROTO(unsigned int val1, unsigned int val2), \
149 TP_ARGS(val1, val2))
150
151DECLARE_EVENT_CLASS(ocfs2__ull_uint,
152 TP_PROTO(unsigned long long value1, unsigned int value2),
153 TP_ARGS(value1, value2),
154 TP_STRUCT__entry(
155 __field(unsigned long long, value1)
156 __field(unsigned int, value2)
157 ),
158 TP_fast_assign(
159 __entry->value1 = value1;
160 __entry->value2 = value2;
161 ),
162 TP_printk("%llu %u", __entry->value1, __entry->value2)
163);
164
165#define DEFINE_OCFS2_ULL_UINT_EVENT(name) \
166DEFINE_EVENT(ocfs2__ull_uint, name, \
167 TP_PROTO(unsigned long long val1, unsigned int val2), \
168 TP_ARGS(val1, val2))
169
170DECLARE_EVENT_CLASS(ocfs2__ull_int,
171 TP_PROTO(unsigned long long value1, int value2),
172 TP_ARGS(value1, value2),
173 TP_STRUCT__entry(
174 __field(unsigned long long, value1)
175 __field(int, value2)
176 ),
177 TP_fast_assign(
178 __entry->value1 = value1;
179 __entry->value2 = value2;
180 ),
181 TP_printk("%llu %d", __entry->value1, __entry->value2)
182);
183
184#define DEFINE_OCFS2_ULL_INT_EVENT(name) \
185DEFINE_EVENT(ocfs2__ull_int, name, \
186 TP_PROTO(unsigned long long val1, int val2), \
187 TP_ARGS(val1, val2))
188
189DECLARE_EVENT_CLASS(ocfs2__ull_ull,
190 TP_PROTO(unsigned long long value1, unsigned long long value2),
191 TP_ARGS(value1, value2),
192 TP_STRUCT__entry(
193 __field(unsigned long long, value1)
194 __field(unsigned long long, value2)
195 ),
196 TP_fast_assign(
197 __entry->value1 = value1;
198 __entry->value2 = value2;
199 ),
200 TP_printk("%llu %llu", __entry->value1, __entry->value2)
201);
202
203#define DEFINE_OCFS2_ULL_ULL_EVENT(name) \
204DEFINE_EVENT(ocfs2__ull_ull, name, \
205 TP_PROTO(unsigned long long val1, unsigned long long val2), \
206 TP_ARGS(val1, val2))
207
208DECLARE_EVENT_CLASS(ocfs2__ull_ull_uint,
209 TP_PROTO(unsigned long long value1,
210 unsigned long long value2, unsigned int value3),
211 TP_ARGS(value1, value2, value3),
212 TP_STRUCT__entry(
213 __field(unsigned long long, value1)
214 __field(unsigned long long, value2)
215 __field(unsigned int, value3)
216 ),
217 TP_fast_assign(
218 __entry->value1 = value1;
219 __entry->value2 = value2;
220 __entry->value3 = value3;
221 ),
222 TP_printk("%llu %llu %u",
223 __entry->value1, __entry->value2, __entry->value3)
224);
225
226#define DEFINE_OCFS2_ULL_ULL_UINT_EVENT(name) \
227DEFINE_EVENT(ocfs2__ull_ull_uint, name, \
228 TP_PROTO(unsigned long long val1, \
229 unsigned long long val2, unsigned int val3), \
230 TP_ARGS(val1, val2, val3))
231
232DECLARE_EVENT_CLASS(ocfs2__ull_uint_uint,
233 TP_PROTO(unsigned long long value1,
234 unsigned int value2, unsigned int value3),
235 TP_ARGS(value1, value2, value3),
236 TP_STRUCT__entry(
237 __field(unsigned long long, value1)
238 __field(unsigned int, value2)
239 __field(unsigned int, value3)
240 ),
241 TP_fast_assign(
242 __entry->value1 = value1;
243 __entry->value2 = value2;
244 __entry->value3 = value3;
245 ),
246 TP_printk("%llu %u %u", __entry->value1,
247 __entry->value2, __entry->value3)
248);
249
250#define DEFINE_OCFS2_ULL_UINT_UINT_EVENT(name) \
251DEFINE_EVENT(ocfs2__ull_uint_uint, name, \
252 TP_PROTO(unsigned long long val1, \
253 unsigned int val2, unsigned int val3), \
254 TP_ARGS(val1, val2, val3))
255
256DECLARE_EVENT_CLASS(ocfs2__uint_uint_uint,
257 TP_PROTO(unsigned int value1, unsigned int value2,
258 unsigned int value3),
259 TP_ARGS(value1, value2, value3),
260 TP_STRUCT__entry(
261 __field( unsigned int, value1 )
262 __field( unsigned int, value2 )
263 __field( unsigned int, value3 )
264 ),
265 TP_fast_assign(
266 __entry->value1 = value1;
267 __entry->value2 = value2;
268 __entry->value3 = value3;
269 ),
270 TP_printk("%u %u %u", __entry->value1, __entry->value2, __entry->value3)
271);
272
273#define DEFINE_OCFS2_UINT_UINT_UINT_EVENT(name) \
274DEFINE_EVENT(ocfs2__uint_uint_uint, name, \
275 TP_PROTO(unsigned int value1, unsigned int value2, \
276 unsigned int value3), \
277 TP_ARGS(value1, value2, value3))
278
279DECLARE_EVENT_CLASS(ocfs2__ull_ull_ull,
280 TP_PROTO(unsigned long long value1,
281 unsigned long long value2, unsigned long long value3),
282 TP_ARGS(value1, value2, value3),
283 TP_STRUCT__entry(
284 __field(unsigned long long, value1)
285 __field(unsigned long long, value2)
286 __field(unsigned long long, value3)
287 ),
288 TP_fast_assign(
289 __entry->value1 = value1;
290 __entry->value2 = value2;
291 __entry->value3 = value3;
292 ),
293 TP_printk("%llu %llu %llu",
294 __entry->value1, __entry->value2, __entry->value3)
295);
296
297#define DEFINE_OCFS2_ULL_ULL_ULL_EVENT(name) \
298DEFINE_EVENT(ocfs2__ull_ull_ull, name, \
299 TP_PROTO(unsigned long long value1, unsigned long long value2, \
300 unsigned long long value3), \
301 TP_ARGS(value1, value2, value3))
302
303DECLARE_EVENT_CLASS(ocfs2__ull_int_int_int,
304 TP_PROTO(unsigned long long ull, int value1, int value2, int value3),
305 TP_ARGS(ull, value1, value2, value3),
306 TP_STRUCT__entry(
307 __field( unsigned long long, ull )
308 __field( int, value1 )
309 __field( int, value2 )
310 __field( int, value3 )
311 ),
312 TP_fast_assign(
313 __entry->ull = ull;
314 __entry->value1 = value1;
315 __entry->value2 = value2;
316 __entry->value3 = value3;
317 ),
318 TP_printk("%llu %d %d %d",
319 __entry->ull, __entry->value1,
320 __entry->value2, __entry->value3)
321);
322
323#define DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(name) \
324DEFINE_EVENT(ocfs2__ull_int_int_int, name, \
325 TP_PROTO(unsigned long long ull, int value1, \
326 int value2, int value3), \
327 TP_ARGS(ull, value1, value2, value3))
328
329DECLARE_EVENT_CLASS(ocfs2__ull_uint_uint_uint,
330 TP_PROTO(unsigned long long ull, unsigned int value1,
331 unsigned int value2, unsigned int value3),
332 TP_ARGS(ull, value1, value2, value3),
333 TP_STRUCT__entry(
334 __field(unsigned long long, ull)
335 __field(unsigned int, value1)
336 __field(unsigned int, value2)
337 __field(unsigned int, value3)
338 ),
339 TP_fast_assign(
340 __entry->ull = ull;
341 __entry->value1 = value1;
342 __entry->value2 = value2;
343 __entry->value3 = value3;
344 ),
345 TP_printk("%llu %u %u %u",
346 __entry->ull, __entry->value1,
347 __entry->value2, __entry->value3)
348);
349
350#define DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(name) \
351DEFINE_EVENT(ocfs2__ull_uint_uint_uint, name, \
352 TP_PROTO(unsigned long long ull, unsigned int value1, \
353 unsigned int value2, unsigned int value3), \
354 TP_ARGS(ull, value1, value2, value3))
355
356DECLARE_EVENT_CLASS(ocfs2__ull_ull_uint_uint,
357 TP_PROTO(unsigned long long value1, unsigned long long value2,
358 unsigned int value3, unsigned int value4),
359 TP_ARGS(value1, value2, value3, value4),
360 TP_STRUCT__entry(
361 __field(unsigned long long, value1)
362 __field(unsigned long long, value2)
363 __field(unsigned int, value3)
364 __field(unsigned int, value4)
365 ),
366 TP_fast_assign(
367 __entry->value1 = value1;
368 __entry->value2 = value2;
369 __entry->value3 = value3;
370 __entry->value4 = value4;
371 ),
372 TP_printk("%llu %llu %u %u",
373 __entry->value1, __entry->value2,
374 __entry->value3, __entry->value4)
375);
376
377#define DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(name) \
378DEFINE_EVENT(ocfs2__ull_ull_uint_uint, name, \
379 TP_PROTO(unsigned long long ull, unsigned long long ull1, \
380 unsigned int value2, unsigned int value3), \
381 TP_ARGS(ull, ull1, value2, value3))
382
383/* Trace events for fs/ocfs2/alloc.c. */
384DECLARE_EVENT_CLASS(ocfs2__btree_ops,
385 TP_PROTO(unsigned long long owner,\
386 unsigned int value1, unsigned int value2),
387 TP_ARGS(owner, value1, value2),
388 TP_STRUCT__entry(
389 __field(unsigned long long, owner)
390 __field(unsigned int, value1)
391 __field(unsigned int, value2)
392 ),
393 TP_fast_assign(
394 __entry->owner = owner;
395 __entry->value1 = value1;
396 __entry->value2 = value2;
397 ),
398 TP_printk("%llu %u %u",
399 __entry->owner, __entry->value1, __entry->value2)
400);
401
402#define DEFINE_OCFS2_BTREE_EVENT(name) \
403DEFINE_EVENT(ocfs2__btree_ops, name, \
404 TP_PROTO(unsigned long long owner, \
405 unsigned int value1, unsigned int value2), \
406 TP_ARGS(owner, value1, value2))
407
408DEFINE_OCFS2_BTREE_EVENT(ocfs2_adjust_rightmost_branch);
409
410DEFINE_OCFS2_BTREE_EVENT(ocfs2_rotate_tree_right);
411
412DEFINE_OCFS2_BTREE_EVENT(ocfs2_append_rec_to_path);
413
414DEFINE_OCFS2_BTREE_EVENT(ocfs2_insert_extent_start);
415
416DEFINE_OCFS2_BTREE_EVENT(ocfs2_add_clusters_in_btree);
417
418DEFINE_OCFS2_INT_EVENT(ocfs2_num_free_extents);
419
420DEFINE_OCFS2_INT_EVENT(ocfs2_complete_edge_insert);
421
422TRACE_EVENT(ocfs2_grow_tree,
423 TP_PROTO(unsigned long long owner, int depth),
424 TP_ARGS(owner, depth),
425 TP_STRUCT__entry(
426 __field(unsigned long long, owner)
427 __field(int, depth)
428 ),
429 TP_fast_assign(
430 __entry->owner = owner;
431 __entry->depth = depth;
432 ),
433 TP_printk("%llu %d", __entry->owner, __entry->depth)
434);
435
436TRACE_EVENT(ocfs2_rotate_subtree,
437 TP_PROTO(int subtree_root, unsigned long long blkno,
438 int depth),
439 TP_ARGS(subtree_root, blkno, depth),
440 TP_STRUCT__entry(
441 __field(int, subtree_root)
442 __field(unsigned long long, blkno)
443 __field(int, depth)
444 ),
445 TP_fast_assign(
446 __entry->subtree_root = subtree_root;
447 __entry->blkno = blkno;
448 __entry->depth = depth;
449 ),
450 TP_printk("%d %llu %d", __entry->subtree_root,
451 __entry->blkno, __entry->depth)
452);
453
454TRACE_EVENT(ocfs2_insert_extent,
455 TP_PROTO(unsigned int ins_appending, unsigned int ins_contig,
456 int ins_contig_index, int free_records, int ins_tree_depth),
457 TP_ARGS(ins_appending, ins_contig, ins_contig_index, free_records,
458 ins_tree_depth),
459 TP_STRUCT__entry(
460 __field(unsigned int, ins_appending)
461 __field(unsigned int, ins_contig)
462 __field(int, ins_contig_index)
463 __field(int, free_records)
464 __field(int, ins_tree_depth)
465 ),
466 TP_fast_assign(
467 __entry->ins_appending = ins_appending;
468 __entry->ins_contig = ins_contig;
469 __entry->ins_contig_index = ins_contig_index;
470 __entry->free_records = free_records;
471 __entry->ins_tree_depth = ins_tree_depth;
472 ),
473 TP_printk("%u %u %d %d %d",
474 __entry->ins_appending, __entry->ins_contig,
475 __entry->ins_contig_index, __entry->free_records,
476 __entry->ins_tree_depth)
477);
478
479TRACE_EVENT(ocfs2_split_extent,
480 TP_PROTO(int split_index, unsigned int c_contig_type,
481 unsigned int c_has_empty_extent,
482 unsigned int c_split_covers_rec),
483 TP_ARGS(split_index, c_contig_type,
484 c_has_empty_extent, c_split_covers_rec),
485 TP_STRUCT__entry(
486 __field(int, split_index)
487 __field(unsigned int, c_contig_type)
488 __field(unsigned int, c_has_empty_extent)
489 __field(unsigned int, c_split_covers_rec)
490 ),
491 TP_fast_assign(
492 __entry->split_index = split_index;
493 __entry->c_contig_type = c_contig_type;
494 __entry->c_has_empty_extent = c_has_empty_extent;
495 __entry->c_split_covers_rec = c_split_covers_rec;
496 ),
497 TP_printk("%d %u %u %u", __entry->split_index, __entry->c_contig_type,
498 __entry->c_has_empty_extent, __entry->c_split_covers_rec)
499);
500
501TRACE_EVENT(ocfs2_remove_extent,
502 TP_PROTO(unsigned long long owner, unsigned int cpos,
503 unsigned int len, int index,
504 unsigned int e_cpos, unsigned int clusters),
505 TP_ARGS(owner, cpos, len, index, e_cpos, clusters),
506 TP_STRUCT__entry(
507 __field(unsigned long long, owner)
508 __field(unsigned int, cpos)
509 __field(unsigned int, len)
510 __field(int, index)
511 __field(unsigned int, e_cpos)
512 __field(unsigned int, clusters)
513 ),
514 TP_fast_assign(
515 __entry->owner = owner;
516 __entry->cpos = cpos;
517 __entry->len = len;
518 __entry->index = index;
519 __entry->e_cpos = e_cpos;
520 __entry->clusters = clusters;
521 ),
522 TP_printk("%llu %u %u %d %u %u",
523 __entry->owner, __entry->cpos, __entry->len, __entry->index,
524 __entry->e_cpos, __entry->clusters)
525);
526
527TRACE_EVENT(ocfs2_commit_truncate,
528 TP_PROTO(unsigned long long ino, unsigned int new_cpos,
529 unsigned int clusters, unsigned int depth),
530 TP_ARGS(ino, new_cpos, clusters, depth),
531 TP_STRUCT__entry(
532 __field(unsigned long long, ino)
533 __field(unsigned int, new_cpos)
534 __field(unsigned int, clusters)
535 __field(unsigned int, depth)
536 ),
537 TP_fast_assign(
538 __entry->ino = ino;
539 __entry->new_cpos = new_cpos;
540 __entry->clusters = clusters;
541 __entry->depth = depth;
542 ),
543 TP_printk("%llu %u %u %u",
544 __entry->ino, __entry->new_cpos,
545 __entry->clusters, __entry->depth)
546);
547
548TRACE_EVENT(ocfs2_validate_extent_block,
549 TP_PROTO(unsigned long long blkno),
550 TP_ARGS(blkno),
551 TP_STRUCT__entry(
552 __field(unsigned long long, blkno)
553 ),
554 TP_fast_assign(
555 __entry->blkno = blkno;
556 ),
557 TP_printk("%llu ", __entry->blkno)
558);
559
560TRACE_EVENT(ocfs2_rotate_leaf,
561 TP_PROTO(unsigned int insert_cpos, int insert_index,
562 int has_empty, int next_free,
563 unsigned int l_count),
564 TP_ARGS(insert_cpos, insert_index, has_empty,
565 next_free, l_count),
566 TP_STRUCT__entry(
567 __field(unsigned int, insert_cpos)
568 __field(int, insert_index)
569 __field(int, has_empty)
570 __field(int, next_free)
571 __field(unsigned int, l_count)
572 ),
573 TP_fast_assign(
574 __entry->insert_cpos = insert_cpos;
575 __entry->insert_index = insert_index;
576 __entry->has_empty = has_empty;
577 __entry->next_free = next_free;
578 __entry->l_count = l_count;
579 ),
580 TP_printk("%u %d %d %d %u", __entry->insert_cpos,
581 __entry->insert_index, __entry->has_empty,
582 __entry->next_free, __entry->l_count)
583);
584
585TRACE_EVENT(ocfs2_add_clusters_in_btree_ret,
586 TP_PROTO(int status, int reason, int err),
587 TP_ARGS(status, reason, err),
588 TP_STRUCT__entry(
589 __field(int, status)
590 __field(int, reason)
591 __field(int, err)
592 ),
593 TP_fast_assign(
594 __entry->status = status;
595 __entry->reason = reason;
596 __entry->err = err;
597 ),
598 TP_printk("%d %d %d", __entry->status,
599 __entry->reason, __entry->err)
600);
601
602TRACE_EVENT(ocfs2_mark_extent_written,
603 TP_PROTO(unsigned long long owner, unsigned int cpos,
604 unsigned int len, unsigned int phys),
605 TP_ARGS(owner, cpos, len, phys),
606 TP_STRUCT__entry(
607 __field(unsigned long long, owner)
608 __field(unsigned int, cpos)
609 __field(unsigned int, len)
610 __field(unsigned int, phys)
611 ),
612 TP_fast_assign(
613 __entry->owner = owner;
614 __entry->cpos = cpos;
615 __entry->len = len;
616 __entry->phys = phys;
617 ),
618 TP_printk("%llu %u %u %u",
619 __entry->owner, __entry->cpos,
620 __entry->len, __entry->phys)
621);
622
623DECLARE_EVENT_CLASS(ocfs2__truncate_log_ops,
624 TP_PROTO(unsigned long long blkno, int index,
625 unsigned int start, unsigned int num),
626 TP_ARGS(blkno, index, start, num),
627 TP_STRUCT__entry(
628 __field(unsigned long long, blkno)
629 __field(int, index)
630 __field(unsigned int, start)
631 __field(unsigned int, num)
632 ),
633 TP_fast_assign(
634 __entry->blkno = blkno;
635 __entry->index = index;
636 __entry->start = start;
637 __entry->num = num;
638 ),
639 TP_printk("%llu %d %u %u",
640 __entry->blkno, __entry->index,
641 __entry->start, __entry->num)
642);
643
644#define DEFINE_OCFS2_TRUNCATE_LOG_OPS_EVENT(name) \
645DEFINE_EVENT(ocfs2__truncate_log_ops, name, \
646 TP_PROTO(unsigned long long blkno, int index, \
647 unsigned int start, unsigned int num), \
648 TP_ARGS(blkno, index, start, num))
649
650DEFINE_OCFS2_TRUNCATE_LOG_OPS_EVENT(ocfs2_truncate_log_append);
651
652DEFINE_OCFS2_TRUNCATE_LOG_OPS_EVENT(ocfs2_replay_truncate_records);
653
654DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_flush_truncate_log);
655
656DEFINE_OCFS2_INT_EVENT(ocfs2_begin_truncate_log_recovery);
657
658DEFINE_OCFS2_INT_EVENT(ocfs2_truncate_log_recovery_num);
659
660DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_complete_truncate_log_recovery);
661
662DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_free_cached_blocks);
663
664DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_cache_cluster_dealloc);
665
666DEFINE_OCFS2_INT_INT_EVENT(ocfs2_run_deallocs);
667
668TRACE_EVENT(ocfs2_cache_block_dealloc,
669 TP_PROTO(int type, int slot, unsigned long long suballoc,
670 unsigned long long blkno, unsigned int bit),
671 TP_ARGS(type, slot, suballoc, blkno, bit),
672 TP_STRUCT__entry(
673 __field(int, type)
674 __field(int, slot)
675 __field(unsigned long long, suballoc)
676 __field(unsigned long long, blkno)
677 __field(unsigned int, bit)
678 ),
679 TP_fast_assign(
680 __entry->type = type;
681 __entry->slot = slot;
682 __entry->suballoc = suballoc;
683 __entry->blkno = blkno;
684 __entry->bit = bit;
685 ),
686 TP_printk("%d %d %llu %llu %u",
687 __entry->type, __entry->slot, __entry->suballoc,
688 __entry->blkno, __entry->bit)
689);
690
691/* End of trace events for fs/ocfs2/alloc.c. */
692
693/* Trace events for fs/ocfs2/localalloc.c. */
694
695DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_la_set_sizes);
696
697DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_alloc_should_use_local);
698
699DEFINE_OCFS2_INT_EVENT(ocfs2_load_local_alloc);
700
701DEFINE_OCFS2_INT_EVENT(ocfs2_begin_local_alloc_recovery);
702
703DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_reserve_local_alloc_bits);
704
705DEFINE_OCFS2_UINT_EVENT(ocfs2_local_alloc_count_bits);
706
707DEFINE_OCFS2_INT_INT_EVENT(ocfs2_local_alloc_find_clear_bits_search_bitmap);
708
709DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_local_alloc_find_clear_bits);
710
711DEFINE_OCFS2_INT_INT_EVENT(ocfs2_sync_local_to_main);
712
713TRACE_EVENT(ocfs2_sync_local_to_main_free,
714 TP_PROTO(int count, int bit, unsigned long long start_blk,
715 unsigned long long blkno),
716 TP_ARGS(count, bit, start_blk, blkno),
717 TP_STRUCT__entry(
718 __field(int, count)
719 __field(int, bit)
720 __field(unsigned long long, start_blk)
721 __field(unsigned long long, blkno)
722 ),
723 TP_fast_assign(
724 __entry->count = count;
725 __entry->bit = bit;
726 __entry->start_blk = start_blk;
727 __entry->blkno = blkno;
728 ),
729 TP_printk("%d %d %llu %llu",
730 __entry->count, __entry->bit, __entry->start_blk,
731 __entry->blkno)
732);
733
734DEFINE_OCFS2_INT_INT_EVENT(ocfs2_local_alloc_new_window);
735
736DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_local_alloc_new_window_result);
737
738/* End of trace events for fs/ocfs2/localalloc.c. */
739
740/* Trace events for fs/ocfs2/resize.c. */
741
742DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_update_last_group_and_inode);
743
744DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_group_extend);
745
746DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_group_add);
747
748/* End of trace events for fs/ocfs2/resize.c. */
749
750/* Trace events for fs/ocfs2/suballoc.c. */
751
752DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_group_descriptor);
753
754DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_block_group_alloc_contig);
755
756DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_block_group_alloc_discontig);
757
758DEFINE_OCFS2_ULL_EVENT(ocfs2_block_group_alloc);
759
760DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_reserve_suballoc_bits_nospc);
761
762DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_reserve_suballoc_bits_no_new_group);
763
764DEFINE_OCFS2_ULL_EVENT(ocfs2_reserve_new_inode_new_group);
765
766DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_block_group_set_bits);
767
768TRACE_EVENT(ocfs2_relink_block_group,
769 TP_PROTO(unsigned long long i_blkno, unsigned int chain,
770 unsigned long long bg_blkno,
771 unsigned long long prev_blkno),
772 TP_ARGS(i_blkno, chain, bg_blkno, prev_blkno),
773 TP_STRUCT__entry(
774 __field(unsigned long long, i_blkno)
775 __field(unsigned int, chain)
776 __field(unsigned long long, bg_blkno)
777 __field(unsigned long long, prev_blkno)
778 ),
779 TP_fast_assign(
780 __entry->i_blkno = i_blkno;
781 __entry->chain = chain;
782 __entry->bg_blkno = bg_blkno;
783 __entry->prev_blkno = prev_blkno;
784 ),
785 TP_printk("%llu %u %llu %llu",
786 __entry->i_blkno, __entry->chain, __entry->bg_blkno,
787 __entry->prev_blkno)
788);
789
790DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_cluster_group_search_wrong_max_bits);
791
792DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_cluster_group_search_max_block);
793
794DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_block_group_search_max_block);
795
796DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_search_chain_begin);
797
798DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_search_chain_succ);
799
800DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_search_chain_end);
801
802DEFINE_OCFS2_UINT_EVENT(ocfs2_claim_suballoc_bits);
803
804DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_claim_new_inode_at_loc);
805
806DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_block_group_clear_bits);
807
808TRACE_EVENT(ocfs2_free_suballoc_bits,
809 TP_PROTO(unsigned long long inode, unsigned long long group,
810 unsigned int start_bit, unsigned int count),
811 TP_ARGS(inode, group, start_bit, count),
812 TP_STRUCT__entry(
813 __field(unsigned long long, inode)
814 __field(unsigned long long, group)
815 __field(unsigned int, start_bit)
816 __field(unsigned int, count)
817 ),
818 TP_fast_assign(
819 __entry->inode = inode;
820 __entry->group = group;
821 __entry->start_bit = start_bit;
822 __entry->count = count;
823 ),
824 TP_printk("%llu %llu %u %u", __entry->inode, __entry->group,
825 __entry->start_bit, __entry->count)
826);
827
828TRACE_EVENT(ocfs2_free_clusters,
829 TP_PROTO(unsigned long long bg_blkno, unsigned long long start_blk,
830 unsigned int start_bit, unsigned int count),
831 TP_ARGS(bg_blkno, start_blk, start_bit, count),
832 TP_STRUCT__entry(
833 __field(unsigned long long, bg_blkno)
834 __field(unsigned long long, start_blk)
835 __field(unsigned int, start_bit)
836 __field(unsigned int, count)
837 ),
838 TP_fast_assign(
839 __entry->bg_blkno = bg_blkno;
840 __entry->start_blk = start_blk;
841 __entry->start_bit = start_bit;
842 __entry->count = count;
843 ),
844 TP_printk("%llu %llu %u %u", __entry->bg_blkno, __entry->start_blk,
845 __entry->start_bit, __entry->count)
846);
847
848DEFINE_OCFS2_ULL_EVENT(ocfs2_get_suballoc_slot_bit);
849
850DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_test_suballoc_bit);
851
852DEFINE_OCFS2_ULL_EVENT(ocfs2_test_inode_bit);
853
854/* End of trace events for fs/ocfs2/suballoc.c. */
855
856/* Trace events for fs/ocfs2/refcounttree.c. */
857
858DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_refcount_block);
859
860DEFINE_OCFS2_ULL_EVENT(ocfs2_purge_refcount_trees);
861
862DEFINE_OCFS2_ULL_EVENT(ocfs2_create_refcount_tree);
863
864DEFINE_OCFS2_ULL_EVENT(ocfs2_create_refcount_tree_blkno);
865
866DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_change_refcount_rec);
867
868DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_expand_inline_ref_root);
869
870DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_divide_leaf_refcount_block);
871
872DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_new_leaf_refcount_block);
873
874DECLARE_EVENT_CLASS(ocfs2__refcount_tree_ops,
875 TP_PROTO(unsigned long long blkno, int index,
876 unsigned long long cpos,
877 unsigned int clusters, unsigned int refcount),
878 TP_ARGS(blkno, index, cpos, clusters, refcount),
879 TP_STRUCT__entry(
880 __field(unsigned long long, blkno)
881 __field(int, index)
882 __field(unsigned long long, cpos)
883 __field(unsigned int, clusters)
884 __field(unsigned int, refcount)
885 ),
886 TP_fast_assign(
887 __entry->blkno = blkno;
888 __entry->index = index;
889 __entry->cpos = cpos;
890 __entry->clusters = clusters;
891 __entry->refcount = refcount;
892 ),
893 TP_printk("%llu %d %llu %u %u", __entry->blkno, __entry->index,
894 __entry->cpos, __entry->clusters, __entry->refcount)
895);
896
897#define DEFINE_OCFS2_REFCOUNT_TREE_OPS_EVENT(name) \
898DEFINE_EVENT(ocfs2__refcount_tree_ops, name, \
899 TP_PROTO(unsigned long long blkno, int index, \
900 unsigned long long cpos, \
901 unsigned int count, unsigned int refcount), \
902 TP_ARGS(blkno, index, cpos, count, refcount))
903
904DEFINE_OCFS2_REFCOUNT_TREE_OPS_EVENT(ocfs2_insert_refcount_rec);
905
906TRACE_EVENT(ocfs2_split_refcount_rec,
907 TP_PROTO(unsigned long long cpos,
908 unsigned int clusters, unsigned int refcount,
909 unsigned long long split_cpos,
910 unsigned int split_clusters, unsigned int split_refcount),
911 TP_ARGS(cpos, clusters, refcount,
912 split_cpos, split_clusters, split_refcount),
913 TP_STRUCT__entry(
914 __field(unsigned long long, cpos)
915 __field(unsigned int, clusters)
916 __field(unsigned int, refcount)
917 __field(unsigned long long, split_cpos)
918 __field(unsigned int, split_clusters)
919 __field(unsigned int, split_refcount)
920 ),
921 TP_fast_assign(
922 __entry->cpos = cpos;
923 __entry->clusters = clusters;
924 __entry->refcount = refcount;
925 __entry->split_cpos = split_cpos;
926 __entry->split_clusters = split_clusters;
927 __entry->split_refcount = split_refcount;
928 ),
929 TP_printk("%llu %u %u %llu %u %u",
930 __entry->cpos, __entry->clusters, __entry->refcount,
931 __entry->split_cpos, __entry->split_clusters,
932 __entry->split_refcount)
933);
934
935DEFINE_OCFS2_REFCOUNT_TREE_OPS_EVENT(ocfs2_split_refcount_rec_insert);
936
937DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_increase_refcount_begin);
938
939DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_increase_refcount_change);
940
941DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_increase_refcount_insert);
942
943DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_increase_refcount_split);
944
945DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_remove_refcount_extent);
946
947DEFINE_OCFS2_ULL_EVENT(ocfs2_restore_refcount_block);
948
949DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_decrease_refcount_rec);
950
951TRACE_EVENT(ocfs2_decrease_refcount,
952 TP_PROTO(unsigned long long owner,
953 unsigned long long cpos,
954 unsigned int len, int delete),
955 TP_ARGS(owner, cpos, len, delete),
956 TP_STRUCT__entry(
957 __field(unsigned long long, owner)
958 __field(unsigned long long, cpos)
959 __field(unsigned int, len)
960 __field(int, delete)
961 ),
962 TP_fast_assign(
963 __entry->owner = owner;
964 __entry->cpos = cpos;
965 __entry->len = len;
966 __entry->delete = delete;
967 ),
968 TP_printk("%llu %llu %u %d",
969 __entry->owner, __entry->cpos, __entry->len, __entry->delete)
970);
971
972DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_mark_extent_refcounted);
973
974DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_calc_refcount_meta_credits);
975
976TRACE_EVENT(ocfs2_calc_refcount_meta_credits_iterate,
977 TP_PROTO(int recs_add, unsigned long long cpos,
978 unsigned int clusters, unsigned long long r_cpos,
979 unsigned int r_clusters, unsigned int refcount, int index),
980 TP_ARGS(recs_add, cpos, clusters, r_cpos, r_clusters, refcount, index),
981 TP_STRUCT__entry(
982 __field(int, recs_add)
983 __field(unsigned long long, cpos)
984 __field(unsigned int, clusters)
985 __field(unsigned long long, r_cpos)
986 __field(unsigned int, r_clusters)
987 __field(unsigned int, refcount)
988 __field(int, index)
989 ),
990 TP_fast_assign(
991 __entry->recs_add = recs_add;
992 __entry->cpos = cpos;
993 __entry->clusters = clusters;
994 __entry->r_cpos = r_cpos;
995 __entry->r_clusters = r_clusters;
996 __entry->refcount = refcount;
997 __entry->index = index;
998 ),
999 TP_printk("%d %llu %u %llu %u %u %d",
1000 __entry->recs_add, __entry->cpos, __entry->clusters,
1001 __entry->r_cpos, __entry->r_clusters,
1002 __entry->refcount, __entry->index)
1003);
1004
1005DEFINE_OCFS2_INT_INT_EVENT(ocfs2_add_refcount_flag);
1006
1007DEFINE_OCFS2_INT_INT_EVENT(ocfs2_prepare_refcount_change_for_del);
1008
1009DEFINE_OCFS2_INT_INT_EVENT(ocfs2_lock_refcount_allocators);
1010
1011DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_duplicate_clusters_by_page);
1012
1013DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_duplicate_clusters_by_jbd);
1014
1015TRACE_EVENT(ocfs2_clear_ext_refcount,
1016 TP_PROTO(unsigned long long ino, unsigned int cpos,
1017 unsigned int len, unsigned int p_cluster,
1018 unsigned int ext_flags),
1019 TP_ARGS(ino, cpos, len, p_cluster, ext_flags),
1020 TP_STRUCT__entry(
1021 __field(unsigned long long, ino)
1022 __field(unsigned int, cpos)
1023 __field(unsigned int, len)
1024 __field(unsigned int, p_cluster)
1025 __field(unsigned int, ext_flags)
1026 ),
1027 TP_fast_assign(
1028 __entry->ino = ino;
1029 __entry->cpos = cpos;
1030 __entry->len = len;
1031 __entry->p_cluster = p_cluster;
1032 __entry->ext_flags = ext_flags;
1033 ),
1034 TP_printk("%llu %u %u %u %u",
1035 __entry->ino, __entry->cpos, __entry->len,
1036 __entry->p_cluster, __entry->ext_flags)
1037);
1038
1039TRACE_EVENT(ocfs2_replace_clusters,
1040 TP_PROTO(unsigned long long ino, unsigned int cpos,
1041 unsigned int old, unsigned int new, unsigned int len,
1042 unsigned int ext_flags),
1043 TP_ARGS(ino, cpos, old, new, len, ext_flags),
1044 TP_STRUCT__entry(
1045 __field(unsigned long long, ino)
1046 __field(unsigned int, cpos)
1047 __field(unsigned int, old)
1048 __field(unsigned int, new)
1049 __field(unsigned int, len)
1050 __field(unsigned int, ext_flags)
1051 ),
1052 TP_fast_assign(
1053 __entry->ino = ino;
1054 __entry->cpos = cpos;
1055 __entry->old = old;
1056 __entry->new = new;
1057 __entry->len = len;
1058 __entry->ext_flags = ext_flags;
1059 ),
1060 TP_printk("%llu %u %u %u %u %u",
1061 __entry->ino, __entry->cpos, __entry->old, __entry->new,
1062 __entry->len, __entry->ext_flags)
1063);
1064
1065DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_make_clusters_writable);
1066
1067TRACE_EVENT(ocfs2_refcount_cow_hunk,
1068 TP_PROTO(unsigned long long ino, unsigned int cpos,
1069 unsigned int write_len, unsigned int max_cpos,
1070 unsigned int cow_start, unsigned int cow_len),
1071 TP_ARGS(ino, cpos, write_len, max_cpos, cow_start, cow_len),
1072 TP_STRUCT__entry(
1073 __field(unsigned long long, ino)
1074 __field(unsigned int, cpos)
1075 __field(unsigned int, write_len)
1076 __field(unsigned int, max_cpos)
1077 __field(unsigned int, cow_start)
1078 __field(unsigned int, cow_len)
1079 ),
1080 TP_fast_assign(
1081 __entry->ino = ino;
1082 __entry->cpos = cpos;
1083 __entry->write_len = write_len;
1084 __entry->max_cpos = max_cpos;
1085 __entry->cow_start = cow_start;
1086 __entry->cow_len = cow_len;
1087 ),
1088 TP_printk("%llu %u %u %u %u %u",
1089 __entry->ino, __entry->cpos, __entry->write_len,
1090 __entry->max_cpos, __entry->cow_start, __entry->cow_len)
1091);
1092
1093/* End of trace events for fs/ocfs2/refcounttree.c. */
1094
1095/* Trace events for fs/ocfs2/aops.c. */
1096
1097DECLARE_EVENT_CLASS(ocfs2__get_block,
1098 TP_PROTO(unsigned long long ino, unsigned long long iblock,
1099 void *bh_result, int create),
1100 TP_ARGS(ino, iblock, bh_result, create),
1101 TP_STRUCT__entry(
1102 __field(unsigned long long, ino)
1103 __field(unsigned long long, iblock)
1104 __field(void *, bh_result)
1105 __field(int, create)
1106 ),
1107 TP_fast_assign(
1108 __entry->ino = ino;
1109 __entry->iblock = iblock;
1110 __entry->bh_result = bh_result;
1111 __entry->create = create;
1112 ),
1113 TP_printk("%llu %llu %p %d",
1114 __entry->ino, __entry->iblock,
1115 __entry->bh_result, __entry->create)
1116);
1117
1118#define DEFINE_OCFS2_GET_BLOCK_EVENT(name) \
1119DEFINE_EVENT(ocfs2__get_block, name, \
1120 TP_PROTO(unsigned long long ino, unsigned long long iblock, \
1121 void *bh_result, int create), \
1122 TP_ARGS(ino, iblock, bh_result, create))
1123
1124DEFINE_OCFS2_GET_BLOCK_EVENT(ocfs2_symlink_get_block);
1125
1126DEFINE_OCFS2_GET_BLOCK_EVENT(ocfs2_get_block);
1127
1128DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_get_block_end);
1129
1130DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_readpage);
1131
1132DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_writepage);
1133
1134DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_bmap);
1135
1136TRACE_EVENT(ocfs2_try_to_write_inline_data,
1137 TP_PROTO(unsigned long long ino, unsigned int len,
1138 unsigned long long pos, unsigned int flags),
1139 TP_ARGS(ino, len, pos, flags),
1140 TP_STRUCT__entry(
1141 __field(unsigned long long, ino)
1142 __field(unsigned int, len)
1143 __field(unsigned long long, pos)
1144 __field(unsigned int, flags)
1145 ),
1146 TP_fast_assign(
1147 __entry->ino = ino;
1148 __entry->len = len;
1149 __entry->pos = pos;
1150 __entry->flags = flags;
1151 ),
1152 TP_printk("%llu %u %llu 0x%x",
1153 __entry->ino, __entry->len, __entry->pos, __entry->flags)
1154);
1155
1156TRACE_EVENT(ocfs2_write_begin_nolock,
1157 TP_PROTO(unsigned long long ino,
1158 long long i_size, unsigned int i_clusters,
1159 unsigned long long pos, unsigned int len,
1160 unsigned int flags, void *page,
1161 unsigned int clusters, unsigned int extents_to_split),
1162 TP_ARGS(ino, i_size, i_clusters, pos, len, flags,
1163 page, clusters, extents_to_split),
1164 TP_STRUCT__entry(
1165 __field(unsigned long long, ino)
1166 __field(long long, i_size)
1167 __field(unsigned int, i_clusters)
1168 __field(unsigned long long, pos)
1169 __field(unsigned int, len)
1170 __field(unsigned int, flags)
1171 __field(void *, page)
1172 __field(unsigned int, clusters)
1173 __field(unsigned int, extents_to_split)
1174 ),
1175 TP_fast_assign(
1176 __entry->ino = ino;
1177 __entry->i_size = i_size;
1178 __entry->i_clusters = i_clusters;
1179 __entry->pos = pos;
1180 __entry->len = len;
1181 __entry->flags = flags;
1182 __entry->page = page;
1183 __entry->clusters = clusters;
1184 __entry->extents_to_split = extents_to_split;
1185 ),
1186 TP_printk("%llu %lld %u %llu %u %u %p %u %u",
1187 __entry->ino, __entry->i_size, __entry->i_clusters,
1188 __entry->pos, __entry->len,
1189 __entry->flags, __entry->page, __entry->clusters,
1190 __entry->extents_to_split)
1191);
1192
1193TRACE_EVENT(ocfs2_write_end_inline,
1194 TP_PROTO(unsigned long long ino,
1195 unsigned long long pos, unsigned int copied,
1196 unsigned int id_count, unsigned int features),
1197 TP_ARGS(ino, pos, copied, id_count, features),
1198 TP_STRUCT__entry(
1199 __field(unsigned long long, ino)
1200 __field(unsigned long long, pos)
1201 __field(unsigned int, copied)
1202 __field(unsigned int, id_count)
1203 __field(unsigned int, features)
1204 ),
1205 TP_fast_assign(
1206 __entry->ino = ino;
1207 __entry->pos = pos;
1208 __entry->copied = copied;
1209 __entry->id_count = id_count;
1210 __entry->features = features;
1211 ),
1212 TP_printk("%llu %llu %u %u %u",
1213 __entry->ino, __entry->pos, __entry->copied,
1214 __entry->id_count, __entry->features)
1215);
1216
1217/* End of trace events for fs/ocfs2/aops.c. */
1218
1219/* Trace events for fs/ocfs2/mmap.c. */
1220
1221TRACE_EVENT(ocfs2_fault,
1222 TP_PROTO(unsigned long long ino,
1223 void *area, void *page, unsigned long pgoff),
1224 TP_ARGS(ino, area, page, pgoff),
1225 TP_STRUCT__entry(
1226 __field(unsigned long long, ino)
1227 __field(void *, area)
1228 __field(void *, page)
1229 __field(unsigned long, pgoff)
1230 ),
1231 TP_fast_assign(
1232 __entry->ino = ino;
1233 __entry->area = area;
1234 __entry->page = page;
1235 __entry->pgoff = pgoff;
1236 ),
1237 TP_printk("%llu %p %p %lu",
1238 __entry->ino, __entry->area, __entry->page, __entry->pgoff)
1239);
1240
1241/* End of trace events for fs/ocfs2/mmap.c. */
1242
1243/* Trace events for fs/ocfs2/file.c. */
1244
1245DECLARE_EVENT_CLASS(ocfs2__file_ops,
1246 TP_PROTO(void *inode, void *file, void *dentry,
1247 unsigned long long ino,
1248 unsigned int d_len, const unsigned char *d_name,
1249 unsigned long long para),
1250 TP_ARGS(inode, file, dentry, ino, d_len, d_name, para),
1251 TP_STRUCT__entry(
1252 __field(void *, inode)
1253 __field(void *, file)
1254 __field(void *, dentry)
1255 __field(unsigned long long, ino)
1256 __field(unsigned int, d_len)
1257 __string(d_name, d_name)
1258 __field(unsigned long long, para)
1259 ),
1260 TP_fast_assign(
1261 __entry->inode = inode;
1262 __entry->file = file;
1263 __entry->dentry = dentry;
1264 __entry->ino = ino;
1265 __entry->d_len = d_len;
1266 __assign_str(d_name, d_name);
1267 __entry->para = para;
1268 ),
1269 TP_printk("%p %p %p %llu %llu %.*s", __entry->inode, __entry->file,
1270 __entry->dentry, __entry->ino, __entry->para,
1271 __entry->d_len, __get_str(d_name))
1272);
1273
1274#define DEFINE_OCFS2_FILE_OPS(name) \
1275DEFINE_EVENT(ocfs2__file_ops, name, \
1276TP_PROTO(void *inode, void *file, void *dentry, \
1277 unsigned long long ino, \
1278 unsigned int d_len, const unsigned char *d_name, \
1279 unsigned long long mode), \
1280 TP_ARGS(inode, file, dentry, ino, d_len, d_name, mode))
1281
1282DEFINE_OCFS2_FILE_OPS(ocfs2_file_open);
1283
1284DEFINE_OCFS2_FILE_OPS(ocfs2_file_release);
1285
1286DEFINE_OCFS2_FILE_OPS(ocfs2_sync_file);
1287
1288DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_write);
1289
1290DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write);
1291
1292DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read);
1293
1294DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read);
1295
1296DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file);
1297
1298DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_truncate_file_error);
1299
1300TRACE_EVENT(ocfs2_extend_allocation,
1301 TP_PROTO(unsigned long long ip_blkno, unsigned long long size,
1302 unsigned int clusters, unsigned int clusters_to_add,
1303 int why, int restart_func),
1304 TP_ARGS(ip_blkno, size, clusters, clusters_to_add, why, restart_func),
1305 TP_STRUCT__entry(
1306 __field(unsigned long long, ip_blkno)
1307 __field(unsigned long long, size)
1308 __field(unsigned int, clusters)
1309 __field(unsigned int, clusters_to_add)
1310 __field(int, why)
1311 __field(int, restart_func)
1312 ),
1313 TP_fast_assign(
1314 __entry->ip_blkno = ip_blkno;
1315 __entry->size = size;
1316 __entry->clusters = clusters;
1317 __entry->clusters_to_add = clusters_to_add;
1318 __entry->why = why;
1319 __entry->restart_func = restart_func;
1320 ),
1321 TP_printk("%llu %llu %u %u %d %d",
1322 __entry->ip_blkno, __entry->size, __entry->clusters,
1323 __entry->clusters_to_add, __entry->why, __entry->restart_func)
1324);
1325
1326TRACE_EVENT(ocfs2_extend_allocation_end,
1327 TP_PROTO(unsigned long long ino,
1328 unsigned int di_clusters, unsigned long long di_size,
1329 unsigned int ip_clusters, unsigned long long i_size),
1330 TP_ARGS(ino, di_clusters, di_size, ip_clusters, i_size),
1331 TP_STRUCT__entry(
1332 __field(unsigned long long, ino)
1333 __field(unsigned int, di_clusters)
1334 __field(unsigned long long, di_size)
1335 __field(unsigned int, ip_clusters)
1336 __field(unsigned long long, i_size)
1337 ),
1338 TP_fast_assign(
1339 __entry->ino = ino;
1340 __entry->di_clusters = di_clusters;
1341 __entry->di_size = di_size;
1342 __entry->ip_clusters = ip_clusters;
1343 __entry->i_size = i_size;
1344 ),
1345 TP_printk("%llu %u %llu %u %llu", __entry->ino, __entry->di_clusters,
1346 __entry->di_size, __entry->ip_clusters, __entry->i_size)
1347);
1348
1349TRACE_EVENT(ocfs2_write_zero_page,
1350 TP_PROTO(unsigned long long ino,
1351 unsigned long long abs_from, unsigned long long abs_to,
1352 unsigned long index, unsigned int zero_from,
1353 unsigned int zero_to),
1354 TP_ARGS(ino, abs_from, abs_to, index, zero_from, zero_to),
1355 TP_STRUCT__entry(
1356 __field(unsigned long long, ino)
1357 __field(unsigned long long, abs_from)
1358 __field(unsigned long long, abs_to)
1359 __field(unsigned long, index)
1360 __field(unsigned int, zero_from)
1361 __field(unsigned int, zero_to)
1362 ),
1363 TP_fast_assign(
1364 __entry->ino = ino;
1365 __entry->abs_from = abs_from;
1366 __entry->abs_to = abs_to;
1367 __entry->index = index;
1368 __entry->zero_from = zero_from;
1369 __entry->zero_to = zero_to;
1370 ),
1371 TP_printk("%llu %llu %llu %lu %u %u", __entry->ino,
1372 __entry->abs_from, __entry->abs_to,
1373 __entry->index, __entry->zero_from, __entry->zero_to)
1374);
1375
1376DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_zero_extend_range);
1377
1378DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_zero_extend);
1379
1380TRACE_EVENT(ocfs2_setattr,
1381 TP_PROTO(void *inode, void *dentry,
1382 unsigned long long ino,
1383 unsigned int d_len, const unsigned char *d_name,
1384 unsigned int ia_valid, unsigned int ia_mode,
1385 unsigned int ia_uid, unsigned int ia_gid),
1386 TP_ARGS(inode, dentry, ino, d_len, d_name,
1387 ia_valid, ia_mode, ia_uid, ia_gid),
1388 TP_STRUCT__entry(
1389 __field(void *, inode)
1390 __field(void *, dentry)
1391 __field(unsigned long long, ino)
1392 __field(unsigned int, d_len)
1393 __string(d_name, d_name)
1394 __field(unsigned int, ia_valid)
1395 __field(unsigned int, ia_mode)
1396 __field(unsigned int, ia_uid)
1397 __field(unsigned int, ia_gid)
1398 ),
1399 TP_fast_assign(
1400 __entry->inode = inode;
1401 __entry->dentry = dentry;
1402 __entry->ino = ino;
1403 __entry->d_len = d_len;
1404 __assign_str(d_name, d_name);
1405 __entry->ia_valid = ia_valid;
1406 __entry->ia_mode = ia_mode;
1407 __entry->ia_uid = ia_uid;
1408 __entry->ia_gid = ia_gid;
1409 ),
1410 TP_printk("%p %p %llu %.*s %u %u %u %u", __entry->inode,
1411 __entry->dentry, __entry->ino, __entry->d_len,
1412 __get_str(d_name), __entry->ia_valid, __entry->ia_mode,
1413 __entry->ia_uid, __entry->ia_gid)
1414);
1415
1416DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_write_remove_suid);
1417
1418DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_zero_partial_clusters);
1419
1420DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_zero_partial_clusters_range1);
1421
1422DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_zero_partial_clusters_range2);
1423
1424DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_remove_inode_range);
1425
1426TRACE_EVENT(ocfs2_prepare_inode_for_write,
1427 TP_PROTO(unsigned long long ino, unsigned long long saved_pos,
1428 int appending, unsigned long count,
1429 int *direct_io, int *has_refcount),
1430 TP_ARGS(ino, saved_pos, appending, count, direct_io, has_refcount),
1431 TP_STRUCT__entry(
1432 __field(unsigned long long, ino)
1433 __field(unsigned long long, saved_pos)
1434 __field(int, appending)
1435 __field(unsigned long, count)
1436 __field(int, direct_io)
1437 __field(int, has_refcount)
1438 ),
1439 TP_fast_assign(
1440 __entry->ino = ino;
1441 __entry->saved_pos = saved_pos;
1442 __entry->appending = appending;
1443 __entry->count = count;
1444 __entry->direct_io = direct_io ? *direct_io : -1;
1445 __entry->has_refcount = has_refcount ? *has_refcount : -1;
1446 ),
1447 TP_printk("%llu %llu %d %lu %d %d", __entry->ino,
1448 __entry->saved_pos, __entry->appending, __entry->count,
1449 __entry->direct_io, __entry->has_refcount)
1450);
1451
1452DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret);
1453
1454/* End of trace events for fs/ocfs2/file.c. */
1455
1456/* Trace events for fs/ocfs2/inode.c. */
1457
1458TRACE_EVENT(ocfs2_iget_begin,
1459 TP_PROTO(unsigned long long ino, unsigned int flags, int sysfile_type),
1460 TP_ARGS(ino, flags, sysfile_type),
1461 TP_STRUCT__entry(
1462 __field(unsigned long long, ino)
1463 __field(unsigned int, flags)
1464 __field(int, sysfile_type)
1465 ),
1466 TP_fast_assign(
1467 __entry->ino = ino;
1468 __entry->flags = flags;
1469 __entry->sysfile_type = sysfile_type;
1470 ),
1471 TP_printk("%llu %u %d", __entry->ino,
1472 __entry->flags, __entry->sysfile_type)
1473);
1474
1475DEFINE_OCFS2_ULL_EVENT(ocfs2_iget5_locked);
1476
1477TRACE_EVENT(ocfs2_iget_end,
1478 TP_PROTO(void *inode, unsigned long long ino),
1479 TP_ARGS(inode, ino),
1480 TP_STRUCT__entry(
1481 __field(void *, inode)
1482 __field(unsigned long long, ino)
1483 ),
1484 TP_fast_assign(
1485 __entry->inode = inode;
1486 __entry->ino = ino;
1487 ),
1488 TP_printk("%p %llu", __entry->inode, __entry->ino)
1489);
1490
1491TRACE_EVENT(ocfs2_find_actor,
1492 TP_PROTO(void *inode, unsigned long long ino,
1493 void *args, unsigned long long fi_blkno),
1494 TP_ARGS(inode, ino, args, fi_blkno),
1495 TP_STRUCT__entry(
1496 __field(void *, inode)
1497 __field(unsigned long long, ino)
1498 __field(void *, args)
1499 __field(unsigned long long, fi_blkno)
1500 ),
1501 TP_fast_assign(
1502 __entry->inode = inode;
1503 __entry->ino = ino;
1504 __entry->args = args;
1505 __entry->fi_blkno = fi_blkno;
1506 ),
1507 TP_printk("%p %llu %p %llu", __entry->inode, __entry->ino,
1508 __entry->args, __entry->fi_blkno)
1509);
1510
1511DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_populate_inode);
1512
1513DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_read_locked_inode);
1514
1515DEFINE_OCFS2_INT_INT_EVENT(ocfs2_check_orphan_recovery_state);
1516
1517DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_inode_block);
1518
1519TRACE_EVENT(ocfs2_inode_is_valid_to_delete,
1520 TP_PROTO(void *task, void *dc_task, unsigned long long ino,
1521 unsigned int flags),
1522 TP_ARGS(task, dc_task, ino, flags),
1523 TP_STRUCT__entry(
1524 __field(void *, task)
1525 __field(void *, dc_task)
1526 __field(unsigned long long, ino)
1527 __field(unsigned int, flags)
1528 ),
1529 TP_fast_assign(
1530 __entry->task = task;
1531 __entry->dc_task = dc_task;
1532 __entry->ino = ino;
1533 __entry->flags = flags;
1534 ),
1535 TP_printk("%p %p %llu %u", __entry->task, __entry->dc_task,
1536 __entry->ino, __entry->flags)
1537);
1538
1539DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_query_inode_wipe_begin);
1540
1541DEFINE_OCFS2_UINT_EVENT(ocfs2_query_inode_wipe_succ);
1542
1543DEFINE_OCFS2_INT_INT_EVENT(ocfs2_query_inode_wipe_end);
1544
1545DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_cleanup_delete_inode);
1546
1547DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_delete_inode);
1548
1549DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_clear_inode);
1550
1551DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_drop_inode);
1552
1553TRACE_EVENT(ocfs2_inode_revalidate,
1554 TP_PROTO(void *inode, unsigned long long ino,
1555 unsigned int flags),
1556 TP_ARGS(inode, ino, flags),
1557 TP_STRUCT__entry(
1558 __field(void *, inode)
1559 __field(unsigned long long, ino)
1560 __field(unsigned int, flags)
1561 ),
1562 TP_fast_assign(
1563 __entry->inode = inode;
1564 __entry->ino = ino;
1565 __entry->flags = flags;
1566 ),
1567 TP_printk("%p %llu %u", __entry->inode, __entry->ino, __entry->flags)
1568);
1569
1570DEFINE_OCFS2_ULL_EVENT(ocfs2_mark_inode_dirty);
1571
1572/* End of trace events for fs/ocfs2/inode.c. */
1573
1574/* Trace events for fs/ocfs2/extent_map.c. */
1575
1576TRACE_EVENT(ocfs2_read_virt_blocks,
1577 TP_PROTO(void *inode, unsigned long long vblock, int nr,
1578 void *bhs, unsigned int flags, void *validate),
1579 TP_ARGS(inode, vblock, nr, bhs, flags, validate),
1580 TP_STRUCT__entry(
1581 __field(void *, inode)
1582 __field(unsigned long long, vblock)
1583 __field(int, nr)
1584 __field(void *, bhs)
1585 __field(unsigned int, flags)
1586 __field(void *, validate)
1587 ),
1588 TP_fast_assign(
1589 __entry->inode = inode;
1590 __entry->vblock = vblock;
1591 __entry->nr = nr;
1592 __entry->bhs = bhs;
1593 __entry->flags = flags;
1594 __entry->validate = validate;
1595 ),
1596 TP_printk("%p %llu %d %p %x %p", __entry->inode, __entry->vblock,
1597 __entry->nr, __entry->bhs, __entry->flags, __entry->validate)
1598);
1599
1600/* End of trace events for fs/ocfs2/extent_map.c. */
1601
1602/* Trace events for fs/ocfs2/slot_map.c. */
1603
1604DEFINE_OCFS2_UINT_EVENT(ocfs2_refresh_slot_info);
1605
1606DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_map_slot_buffers);
1607
1608DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_map_slot_buffers_block);
1609
1610DEFINE_OCFS2_INT_EVENT(ocfs2_find_slot);
1611
1612/* End of trace events for fs/ocfs2/slot_map.c. */
1613
1614/* Trace events for fs/ocfs2/heartbeat.c. */
1615
1616DEFINE_OCFS2_INT_EVENT(ocfs2_do_node_down);
1617
1618/* End of trace events for fs/ocfs2/heartbeat.c. */
1619
1620/* Trace events for fs/ocfs2/super.c. */
1621
1622TRACE_EVENT(ocfs2_remount,
1623 TP_PROTO(unsigned long s_flags, unsigned long osb_flags, int flags),
1624 TP_ARGS(s_flags, osb_flags, flags),
1625 TP_STRUCT__entry(
1626 __field(unsigned long, s_flags)
1627 __field(unsigned long, osb_flags)
1628 __field(int, flags)
1629 ),
1630 TP_fast_assign(
1631 __entry->s_flags = s_flags;
1632 __entry->osb_flags = osb_flags;
1633 __entry->flags = flags;
1634 ),
1635 TP_printk("%lu %lu %d", __entry->s_flags,
1636 __entry->osb_flags, __entry->flags)
1637);
1638
1639TRACE_EVENT(ocfs2_fill_super,
1640 TP_PROTO(void *sb, void *data, int silent),
1641 TP_ARGS(sb, data, silent),
1642 TP_STRUCT__entry(
1643 __field(void *, sb)
1644 __field(void *, data)
1645 __field(int, silent)
1646 ),
1647 TP_fast_assign(
1648 __entry->sb = sb;
1649 __entry->data = data;
1650 __entry->silent = silent;
1651 ),
1652 TP_printk("%p %p %d", __entry->sb,
1653 __entry->data, __entry->silent)
1654);
1655
1656TRACE_EVENT(ocfs2_parse_options,
1657 TP_PROTO(int is_remount, char *options),
1658 TP_ARGS(is_remount, options),
1659 TP_STRUCT__entry(
1660 __field(int, is_remount)
1661 __string(options, options)
1662 ),
1663 TP_fast_assign(
1664 __entry->is_remount = is_remount;
1665 __assign_str(options, options);
1666 ),
1667 TP_printk("%d %s", __entry->is_remount, __get_str(options))
1668);
1669
1670DEFINE_OCFS2_POINTER_EVENT(ocfs2_put_super);
1671
1672TRACE_EVENT(ocfs2_statfs,
1673 TP_PROTO(void *sb, void *buf),
1674 TP_ARGS(sb, buf),
1675 TP_STRUCT__entry(
1676 __field(void *, sb)
1677 __field(void *, buf)
1678 ),
1679 TP_fast_assign(
1680 __entry->sb = sb;
1681 __entry->buf = buf;
1682 ),
1683 TP_printk("%p %p", __entry->sb, __entry->buf)
1684);
1685
1686DEFINE_OCFS2_POINTER_EVENT(ocfs2_dismount_volume);
1687
1688TRACE_EVENT(ocfs2_initialize_super,
1689 TP_PROTO(char *label, char *uuid_str, unsigned long long root_dir,
1690 unsigned long long system_dir, int cluster_bits),
1691 TP_ARGS(label, uuid_str, root_dir, system_dir, cluster_bits),
1692 TP_STRUCT__entry(
1693 __string(label, label)
1694 __string(uuid_str, uuid_str)
1695 __field(unsigned long long, root_dir)
1696 __field(unsigned long long, system_dir)
1697 __field(int, cluster_bits)
1698 ),
1699 TP_fast_assign(
1700 __assign_str(label, label);
1701 __assign_str(uuid_str, uuid_str);
1702 __entry->root_dir = root_dir;
1703 __entry->system_dir = system_dir;
1704 __entry->cluster_bits = cluster_bits;
1705 ),
1706 TP_printk("%s %s %llu %llu %d", __get_str(label), __get_str(uuid_str),
1707 __entry->root_dir, __entry->system_dir, __entry->cluster_bits)
1708);
1709
1710/* End of trace events for fs/ocfs2/super.c. */
1711
1712/* Trace events for fs/ocfs2/xattr.c. */
1713
1714DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_xattr_block);
1715
1716DEFINE_OCFS2_UINT_EVENT(ocfs2_xattr_extend_allocation);
1717
1718TRACE_EVENT(ocfs2_init_xattr_set_ctxt,
1719 TP_PROTO(const char *name, int meta, int clusters, int credits),
1720 TP_ARGS(name, meta, clusters, credits),
1721 TP_STRUCT__entry(
1722 __string(name, name)
1723 __field(int, meta)
1724 __field(int, clusters)
1725 __field(int, credits)
1726 ),
1727 TP_fast_assign(
1728 __assign_str(name, name);
1729 __entry->meta = meta;
1730 __entry->clusters = clusters;
1731 __entry->credits = credits;
1732 ),
1733 TP_printk("%s %d %d %d", __get_str(name), __entry->meta,
1734 __entry->clusters, __entry->credits)
1735);
1736
1737DECLARE_EVENT_CLASS(ocfs2__xattr_find,
1738 TP_PROTO(unsigned long long ino, const char *name, int name_index,
1739 unsigned int hash, unsigned long long location,
1740 int xe_index),
1741 TP_ARGS(ino, name, name_index, hash, location, xe_index),
1742 TP_STRUCT__entry(
1743 __field(unsigned long long, ino)
1744 __string(name, name)
1745 __field(int, name_index)
1746 __field(unsigned int, hash)
1747 __field(unsigned long long, location)
1748 __field(int, xe_index)
1749 ),
1750 TP_fast_assign(
1751 __entry->ino = ino;
1752 __assign_str(name, name);
1753 __entry->name_index = name_index;
1754 __entry->hash = hash;
1755 __entry->location = location;
1756 __entry->xe_index = xe_index;
1757 ),
1758 TP_printk("%llu %s %d %u %llu %d", __entry->ino, __get_str(name),
1759 __entry->name_index, __entry->hash, __entry->location,
1760 __entry->xe_index)
1761);
1762
1763#define DEFINE_OCFS2_XATTR_FIND_EVENT(name) \
1764DEFINE_EVENT(ocfs2__xattr_find, name, \
1765TP_PROTO(unsigned long long ino, const char *name, int name_index, \
1766 unsigned int hash, unsigned long long bucket, \
1767 int xe_index), \
1768 TP_ARGS(ino, name, name_index, hash, bucket, xe_index))
1769
1770DEFINE_OCFS2_XATTR_FIND_EVENT(ocfs2_xattr_bucket_find);
1771
1772DEFINE_OCFS2_XATTR_FIND_EVENT(ocfs2_xattr_index_block_find);
1773
1774DEFINE_OCFS2_XATTR_FIND_EVENT(ocfs2_xattr_index_block_find_rec);
1775
1776DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_iterate_xattr_buckets);
1777
1778DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_iterate_xattr_bucket);
1779
1780DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_cp_xattr_block_to_bucket_begin);
1781
1782DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_cp_xattr_block_to_bucket_end);
1783
1784DEFINE_OCFS2_ULL_EVENT(ocfs2_xattr_create_index_block_begin);
1785
1786DEFINE_OCFS2_ULL_EVENT(ocfs2_xattr_create_index_block);
1787
1788DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_defrag_xattr_bucket);
1789
1790DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_mv_xattr_bucket_cross_cluster);
1791
1792DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_divide_xattr_bucket_begin);
1793
1794DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_divide_xattr_bucket_move);
1795
1796DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_cp_xattr_bucket);
1797
1798DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_mv_xattr_buckets);
1799
1800DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_adjust_xattr_cross_cluster);
1801
1802DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_add_new_xattr_cluster_begin);
1803
1804DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_add_new_xattr_cluster);
1805
1806DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_add_new_xattr_cluster_insert);
1807
1808DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_extend_xattr_bucket);
1809
1810DEFINE_OCFS2_ULL_EVENT(ocfs2_add_new_xattr_bucket);
1811
1812DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_xattr_bucket_value_truncate);
1813
1814DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_rm_xattr_cluster);
1815
1816DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_reflink_xattr_header);
1817
1818DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_create_empty_xattr_block);
1819
1820DEFINE_OCFS2_STRING_EVENT(ocfs2_xattr_set_entry_bucket);
1821
1822DEFINE_OCFS2_STRING_EVENT(ocfs2_xattr_set_entry_index_block);
1823
1824DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_xattr_bucket_value_refcount);
1825
1826DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_reflink_xattr_buckets);
1827
1828DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_reflink_xattr_rec);
1829
1830/* End of trace events for fs/ocfs2/xattr.c. */
1831
1832/* Trace events for fs/ocfs2/reservations.c. */
1833
1834DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_resv_insert);
1835
1836DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_resmap_find_free_bits_begin);
1837
1838DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_resmap_find_free_bits_end);
1839
1840TRACE_EVENT(ocfs2_resv_find_window_begin,
1841 TP_PROTO(unsigned int r_start, unsigned int r_end, unsigned int goal,
1842 unsigned int wanted, int empty_root),
1843 TP_ARGS(r_start, r_end, goal, wanted, empty_root),
1844 TP_STRUCT__entry(
1845 __field(unsigned int, r_start)
1846 __field(unsigned int, r_end)
1847 __field(unsigned int, goal)
1848 __field(unsigned int, wanted)
1849 __field(int, empty_root)
1850 ),
1851 TP_fast_assign(
1852 __entry->r_start = r_start;
1853 __entry->r_end = r_end;
1854 __entry->goal = goal;
1855 __entry->wanted = wanted;
1856 __entry->empty_root = empty_root;
1857 ),
1858 TP_printk("%u %u %u %u %d", __entry->r_start, __entry->r_end,
1859 __entry->goal, __entry->wanted, __entry->empty_root)
1860);
1861
1862DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_resv_find_window_prev);
1863
1864DEFINE_OCFS2_INT_INT_EVENT(ocfs2_resv_find_window_next);
1865
1866DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_cannibalize_resv_begin);
1867
1868TRACE_EVENT(ocfs2_cannibalize_resv_end,
1869 TP_PROTO(unsigned int start, unsigned int end, unsigned int len,
1870 unsigned int last_start, unsigned int last_len),
1871 TP_ARGS(start, end, len, last_start, last_len),
1872 TP_STRUCT__entry(
1873 __field(unsigned int, start)
1874 __field(unsigned int, end)
1875 __field(unsigned int, len)
1876 __field(unsigned int, last_start)
1877 __field(unsigned int, last_len)
1878 ),
1879 TP_fast_assign(
1880 __entry->start = start;
1881 __entry->end = end;
1882 __entry->len = len;
1883 __entry->last_start = last_start;
1884 __entry->last_len = last_len;
1885 ),
1886 TP_printk("%u %u %u %u %u", __entry->start, __entry->end,
1887 __entry->len, __entry->last_start, __entry->last_len)
1888);
1889
1890DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_resmap_resv_bits);
1891
1892TRACE_EVENT(ocfs2_resmap_claimed_bits_begin,
1893 TP_PROTO(unsigned int cstart, unsigned int cend, unsigned int clen,
1894 unsigned int r_start, unsigned int r_end, unsigned int r_len,
1895 unsigned int last_start, unsigned int last_len),
1896 TP_ARGS(cstart, cend, clen, r_start, r_end,
1897 r_len, last_start, last_len),
1898 TP_STRUCT__entry(
1899 __field(unsigned int, cstart)
1900 __field(unsigned int, cend)
1901 __field(unsigned int, clen)
1902 __field(unsigned int, r_start)
1903 __field(unsigned int, r_end)
1904 __field(unsigned int, r_len)
1905 __field(unsigned int, last_start)
1906 __field(unsigned int, last_len)
1907 ),
1908 TP_fast_assign(
1909 __entry->cstart = cstart;
1910 __entry->cend = cend;
1911 __entry->clen = clen;
1912 __entry->r_start = r_start;
1913 __entry->r_end = r_end;
1914 __entry->r_len = r_len;
1915 __entry->last_start = last_start;
1916 __entry->last_len = last_len;
1917 ),
1918 TP_printk("%u %u %u %u %u %u %u %u",
1919 __entry->cstart, __entry->cend, __entry->clen,
1920 __entry->r_start, __entry->r_end, __entry->r_len,
1921 __entry->last_start, __entry->last_len)
1922);
1923
1924TRACE_EVENT(ocfs2_resmap_claimed_bits_end,
1925 TP_PROTO(unsigned int start, unsigned int end, unsigned int len,
1926 unsigned int last_start, unsigned int last_len),
1927 TP_ARGS(start, end, len, last_start, last_len),
1928 TP_STRUCT__entry(
1929 __field(unsigned int, start)
1930 __field(unsigned int, end)
1931 __field(unsigned int, len)
1932 __field(unsigned int, last_start)
1933 __field(unsigned int, last_len)
1934 ),
1935 TP_fast_assign(
1936 __entry->start = start;
1937 __entry->end = end;
1938 __entry->len = len;
1939 __entry->last_start = last_start;
1940 __entry->last_len = last_len;
1941 ),
1942 TP_printk("%u %u %u %u %u", __entry->start, __entry->end,
1943 __entry->len, __entry->last_start, __entry->last_len)
1944);
1945
1946/* End of trace events for fs/ocfs2/reservations.c. */
1947
1948/* Trace events for fs/ocfs2/quota_local.c. */
1949
1950DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_recover_local_quota_file);
1951
1952DEFINE_OCFS2_INT_EVENT(ocfs2_finish_quota_recovery);
1953
1954DEFINE_OCFS2_ULL_ULL_UINT_EVENT(olq_set_dquot);
1955
1956/* End of trace events for fs/ocfs2/quota_local.c. */
1957
1958/* Trace events for fs/ocfs2/quota_global.c. */
1959
1960DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_quota_block);
1961
1962TRACE_EVENT(ocfs2_sync_dquot,
1963 TP_PROTO(unsigned int dq_id, long long dqb_curspace,
1964 long long spacechange, long long curinodes,
1965 long long inodechange),
1966 TP_ARGS(dq_id, dqb_curspace, spacechange, curinodes, inodechange),
1967 TP_STRUCT__entry(
1968 __field(unsigned int, dq_id)
1969 __field(long long, dqb_curspace)
1970 __field(long long, spacechange)
1971 __field(long long, curinodes)
1972 __field(long long, inodechange)
1973 ),
1974 TP_fast_assign(
1975 __entry->dq_id = dq_id;
1976 __entry->dqb_curspace = dqb_curspace;
1977 __entry->spacechange = spacechange;
1978 __entry->curinodes = curinodes;
1979 __entry->inodechange = inodechange;
1980 ),
1981 TP_printk("%u %lld %lld %lld %lld", __entry->dq_id,
1982 __entry->dqb_curspace, __entry->spacechange,
1983 __entry->curinodes, __entry->inodechange)
1984);
1985
1986TRACE_EVENT(ocfs2_sync_dquot_helper,
1987 TP_PROTO(unsigned int dq_id, unsigned int dq_type, unsigned long type,
1988 const char *s_id),
1989 TP_ARGS(dq_id, dq_type, type, s_id),
1990
1991 TP_STRUCT__entry(
1992 __field(unsigned int, dq_id)
1993 __field(unsigned int, dq_type)
1994 __field(unsigned long, type)
1995 __string(s_id, s_id)
1996 ),
1997 TP_fast_assign(
1998 __entry->dq_id = dq_id;
1999 __entry->dq_type = dq_type;
2000 __entry->type = type;
2001 __assign_str(s_id, s_id);
2002 ),
2003 TP_printk("%u %u %lu %s", __entry->dq_id, __entry->dq_type,
2004 __entry->type, __get_str(s_id))
2005);
2006
2007DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_write_dquot);
2008
2009DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_release_dquot);
2010
2011DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_acquire_dquot);
2012
2013DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_mark_dquot_dirty);
2014
2015/* End of trace events for fs/ocfs2/quota_global.c. */
2016
2017/* Trace events for fs/ocfs2/dir.c. */
2018DEFINE_OCFS2_INT_EVENT(ocfs2_search_dirblock);
2019
2020DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_dir_block);
2021
2022DEFINE_OCFS2_POINTER_EVENT(ocfs2_find_entry_el);
2023
2024TRACE_EVENT(ocfs2_dx_dir_search,
2025 TP_PROTO(unsigned long long ino, int namelen, const char *name,
2026 unsigned int major_hash, unsigned int minor_hash,
2027 unsigned long long blkno),
2028 TP_ARGS(ino, namelen, name, major_hash, minor_hash, blkno),
2029 TP_STRUCT__entry(
2030 __field(unsigned long long, ino)
2031 __field(int, namelen)
2032 __string(name, name)
2033 __field(unsigned int, major_hash)
2034 __field(unsigned int,minor_hash)
2035 __field(unsigned long long, blkno)
2036 ),
2037 TP_fast_assign(
2038 __entry->ino = ino;
2039 __entry->namelen = namelen;
2040 __assign_str(name, name);
2041 __entry->major_hash = major_hash;
2042 __entry->minor_hash = minor_hash;
2043 __entry->blkno = blkno;
2044 ),
2045 TP_printk("%llu %.*s %u %u %llu", __entry->ino,
2046 __entry->namelen, __get_str(name),
2047 __entry->major_hash, __entry->minor_hash, __entry->blkno)
2048);
2049
2050DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_dx_dir_search_leaf_info);
2051
2052DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_delete_entry_dx);
2053
2054DEFINE_OCFS2_ULL_EVENT(ocfs2_readdir);
2055
2056TRACE_EVENT(ocfs2_find_files_on_disk,
2057 TP_PROTO(int namelen, const char *name, void *blkno,
2058 unsigned long long dir),
2059 TP_ARGS(namelen, name, blkno, dir),
2060 TP_STRUCT__entry(
2061 __field(int, namelen)
2062 __string(name, name)
2063 __field(void *, blkno)
2064 __field(unsigned long long, dir)
2065 ),
2066 TP_fast_assign(
2067 __entry->namelen = namelen;
2068 __assign_str(name, name);
2069 __entry->blkno = blkno;
2070 __entry->dir = dir;
2071 ),
2072 TP_printk("%.*s %p %llu", __entry->namelen, __get_str(name),
2073 __entry->blkno, __entry->dir)
2074);
2075
2076TRACE_EVENT(ocfs2_check_dir_for_entry,
2077 TP_PROTO(unsigned long long dir, int namelen, const char *name),
2078 TP_ARGS(dir, namelen, name),
2079 TP_STRUCT__entry(
2080 __field(unsigned long long, dir)
2081 __field(int, namelen)
2082 __string(name, name)
2083 ),
2084 TP_fast_assign(
2085 __entry->dir = dir;
2086 __entry->namelen = namelen;
2087 __assign_str(name, name);
2088 ),
2089 TP_printk("%llu %.*s", __entry->dir,
2090 __entry->namelen, __get_str(name))
2091);
2092
2093DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_dx_dir_attach_index);
2094
2095DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_dx_dir_format_cluster);
2096
2097TRACE_EVENT(ocfs2_dx_dir_index_root_block,
2098 TP_PROTO(unsigned long long dir,
2099 unsigned int major_hash, unsigned int minor_hash,
2100 int namelen, const char *name, unsigned int num_used),
2101 TP_ARGS(dir, major_hash, minor_hash, namelen, name, num_used),
2102 TP_STRUCT__entry(
2103 __field(unsigned long long, dir)
2104 __field(unsigned int, major_hash)
2105 __field(unsigned int, minor_hash)
2106 __field(int, namelen)
2107 __string(name, name)
2108 __field(unsigned int, num_used)
2109 ),
2110 TP_fast_assign(
2111 __entry->dir = dir;
2112 __entry->major_hash = major_hash;
2113 __entry->minor_hash = minor_hash;
2114 __entry->namelen = namelen;
2115 __assign_str(name, name);
2116 __entry->num_used = num_used;
2117 ),
2118 TP_printk("%llu %x %x %.*s %u", __entry->dir,
2119 __entry->major_hash, __entry->minor_hash,
2120 __entry->namelen, __get_str(name), __entry->num_used)
2121);
2122
2123DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_extend_dir);
2124
2125DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_dx_dir_rebalance);
2126
2127DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_dx_dir_rebalance_split);
2128
2129DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_prepare_dir_for_insert);
2130
2131/* End of trace events for fs/ocfs2/dir.c. */
2132
2133/* Trace events for fs/ocfs2/namei.c. */
2134
2135DECLARE_EVENT_CLASS(ocfs2__dentry_ops,
2136 TP_PROTO(void *dir, void *dentry, int name_len, const char *name,
2137 unsigned long long dir_blkno, unsigned long long extra),
2138 TP_ARGS(dir, dentry, name_len, name, dir_blkno, extra),
2139 TP_STRUCT__entry(
2140 __field(void *, dir)
2141 __field(void *, dentry)
2142 __field(int, name_len)
2143 __string(name, name)
2144 __field(unsigned long long, dir_blkno)
2145 __field(unsigned long long, extra)
2146 ),
2147 TP_fast_assign(
2148 __entry->dir = dir;
2149 __entry->dentry = dentry;
2150 __entry->name_len = name_len;
2151 __assign_str(name, name);
2152 __entry->dir_blkno = dir_blkno;
2153 __entry->extra = extra;
2154 ),
2155 TP_printk("%p %p %.*s %llu %llu", __entry->dir, __entry->dentry,
2156 __entry->name_len, __get_str(name),
2157 __entry->dir_blkno, __entry->extra)
2158);
2159
2160#define DEFINE_OCFS2_DENTRY_OPS(name) \
2161DEFINE_EVENT(ocfs2__dentry_ops, name, \
2162TP_PROTO(void *dir, void *dentry, int name_len, const char *name, \
2163 unsigned long long dir_blkno, unsigned long long extra), \
2164 TP_ARGS(dir, dentry, name_len, name, dir_blkno, extra))
2165
2166DEFINE_OCFS2_DENTRY_OPS(ocfs2_lookup);
2167
2168DEFINE_OCFS2_DENTRY_OPS(ocfs2_mkdir);
2169
2170DEFINE_OCFS2_DENTRY_OPS(ocfs2_create);
2171
2172DEFINE_OCFS2_DENTRY_OPS(ocfs2_unlink);
2173
2174DEFINE_OCFS2_DENTRY_OPS(ocfs2_symlink_create);
2175
2176DEFINE_OCFS2_DENTRY_OPS(ocfs2_mv_orphaned_inode_to_new);
2177
2178DEFINE_OCFS2_POINTER_EVENT(ocfs2_lookup_ret);
2179
2180TRACE_EVENT(ocfs2_mknod,
2181 TP_PROTO(void *dir, void *dentry, int name_len, const char *name,
2182 unsigned long long dir_blkno, unsigned long dev, int mode),
2183 TP_ARGS(dir, dentry, name_len, name, dir_blkno, dev, mode),
2184 TP_STRUCT__entry(
2185 __field(void *, dir)
2186 __field(void *, dentry)
2187 __field(int, name_len)
2188 __string(name, name)
2189 __field(unsigned long long, dir_blkno)
2190 __field(unsigned long, dev)
2191 __field(int, mode)
2192 ),
2193 TP_fast_assign(
2194 __entry->dir = dir;
2195 __entry->dentry = dentry;
2196 __entry->name_len = name_len;
2197 __assign_str(name, name);
2198 __entry->dir_blkno = dir_blkno;
2199 __entry->dev = dev;
2200 __entry->mode = mode;
2201 ),
2202 TP_printk("%p %p %.*s %llu %lu %d", __entry->dir, __entry->dentry,
2203 __entry->name_len, __get_str(name),
2204 __entry->dir_blkno, __entry->dev, __entry->mode)
2205);
2206
2207TRACE_EVENT(ocfs2_link,
2208 TP_PROTO(unsigned long long ino, int old_len, const char *old_name,
2209 int name_len, const char *name),
2210 TP_ARGS(ino, old_len, old_name, name_len, name),
2211 TP_STRUCT__entry(
2212 __field(unsigned long long, ino)
2213 __field(int, old_len)
2214 __string(old_name, old_name)
2215 __field(int, name_len)
2216 __string(name, name)
2217 ),
2218 TP_fast_assign(
2219 __entry->ino = ino;
2220 __entry->old_len = old_len;
2221 __assign_str(old_name, old_name);
2222 __entry->name_len = name_len;
2223 __assign_str(name, name);
2224 ),
2225 TP_printk("%llu %.*s %.*s", __entry->ino,
2226 __entry->old_len, __get_str(old_name),
2227 __entry->name_len, __get_str(name))
2228);
2229
2230DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_unlink_noent);
2231
2232DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_double_lock);
2233
2234DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_double_lock_end);
2235
2236TRACE_EVENT(ocfs2_rename,
2237 TP_PROTO(void *old_dir, void *old_dentry,
2238 void *new_dir, void *new_dentry,
2239 int old_len, const char *old_name,
2240 int new_len, const char *new_name),
2241 TP_ARGS(old_dir, old_dentry, new_dir, new_dentry,
2242 old_len, old_name, new_len, new_name),
2243 TP_STRUCT__entry(
2244 __field(void *, old_dir)
2245 __field(void *, old_dentry)
2246 __field(void *, new_dir)
2247 __field(void *, new_dentry)
2248 __field(int, old_len)
2249 __string(old_name, old_name)
2250 __field(int, new_len)
2251 __string(new_name, new_name)
2252 ),
2253 TP_fast_assign(
2254 __entry->old_dir = old_dir;
2255 __entry->old_dentry = old_dentry;
2256 __entry->new_dir = new_dir;
2257 __entry->new_dentry = new_dentry;
2258 __entry->old_len = old_len;
2259 __assign_str(old_name, old_name);
2260 __entry->new_len = new_len;
2261 __assign_str(new_name, new_name);
2262 ),
2263 TP_printk("%p %p %p %p %.*s %.*s",
2264 __entry->old_dir, __entry->old_dentry,
2265 __entry->new_dir, __entry->new_dentry,
2266 __entry->old_len, __get_str(old_name),
2267 __entry->new_len, __get_str(new_name))
2268);
2269
2270TRACE_EVENT(ocfs2_rename_target_exists,
2271 TP_PROTO(int new_len, const char *new_name),
2272 TP_ARGS(new_len, new_name),
2273 TP_STRUCT__entry(
2274 __field(int, new_len)
2275 __string(new_name, new_name)
2276 ),
2277 TP_fast_assign(
2278 __entry->new_len = new_len;
2279 __assign_str(new_name, new_name);
2280 ),
2281 TP_printk("%.*s", __entry->new_len, __get_str(new_name))
2282);
2283
2284DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_rename_disagree);
2285
2286TRACE_EVENT(ocfs2_rename_over_existing,
2287 TP_PROTO(unsigned long long new_blkno, void *new_bh,
2288 unsigned long long newdi_blkno),
2289 TP_ARGS(new_blkno, new_bh, newdi_blkno),
2290 TP_STRUCT__entry(
2291 __field(unsigned long long, new_blkno)
2292 __field(void *, new_bh)
2293 __field(unsigned long long, newdi_blkno)
2294 ),
2295 TP_fast_assign(
2296 __entry->new_blkno = new_blkno;
2297 __entry->new_bh = new_bh;
2298 __entry->newdi_blkno = newdi_blkno;
2299 ),
2300 TP_printk("%llu %p %llu", __entry->new_blkno, __entry->new_bh,
2301 __entry->newdi_blkno)
2302);
2303
2304DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_create_symlink_data);
2305
2306TRACE_EVENT(ocfs2_symlink_begin,
2307 TP_PROTO(void *dir, void *dentry, const char *symname,
2308 int len, const char *name),
2309 TP_ARGS(dir, dentry, symname, len, name),
2310 TP_STRUCT__entry(
2311 __field(void *, dir)
2312 __field(void *, dentry)
2313 __field(const char *, symname)
2314 __field(int, len)
2315 __string(name, name)
2316 ),
2317 TP_fast_assign(
2318 __entry->dir = dir;
2319 __entry->dentry = dentry;
2320 __entry->symname = symname;
2321 __entry->len = len;
2322 __assign_str(name, name);
2323 ),
2324 TP_printk("%p %p %s %.*s", __entry->dir, __entry->dentry,
2325 __entry->symname, __entry->len, __get_str(name))
2326);
2327
2328TRACE_EVENT(ocfs2_blkno_stringify,
2329 TP_PROTO(unsigned long long blkno, const char *name, int namelen),
2330 TP_ARGS(blkno, name, namelen),
2331 TP_STRUCT__entry(
2332 __field(unsigned long long, blkno)
2333 __string(name, name)
2334 __field(int, namelen)
2335 ),
2336 TP_fast_assign(
2337 __entry->blkno = blkno;
2338 __assign_str(name, name);
2339 __entry->namelen = namelen;
2340 ),
2341 TP_printk("%llu %s %d", __entry->blkno, __get_str(name),
2342 __entry->namelen)
2343);
2344
2345DEFINE_OCFS2_ULL_EVENT(ocfs2_orphan_add_begin);
2346
2347DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_orphan_add_end);
2348
2349TRACE_EVENT(ocfs2_orphan_del,
2350 TP_PROTO(unsigned long long dir, const char *name, int namelen),
2351 TP_ARGS(dir, name, namelen),
2352 TP_STRUCT__entry(
2353 __field(unsigned long long, dir)
2354 __string(name, name)
2355 __field(int, namelen)
2356 ),
2357 TP_fast_assign(
2358 __entry->dir = dir;
2359 __assign_str(name, name);
2360 __entry->namelen = namelen;
2361 ),
2362 TP_printk("%llu %s %d", __entry->dir, __get_str(name),
2363 __entry->namelen)
2364);
2365
2366/* End of trace events for fs/ocfs2/namei.c. */
2367
2368/* Trace events for fs/ocfs2/dcache.c. */
2369
2370TRACE_EVENT(ocfs2_dentry_revalidate,
2371 TP_PROTO(void *dentry, int len, const char *name),
2372 TP_ARGS(dentry, len, name),
2373 TP_STRUCT__entry(
2374 __field(void *, dentry)
2375 __field(int, len)
2376 __string(name, name)
2377 ),
2378 TP_fast_assign(
2379 __entry->dentry = dentry;
2380 __entry->len = len;
2381 __assign_str(name, name);
2382 ),
2383 TP_printk("%p %.*s", __entry->dentry, __entry->len, __get_str(name))
2384);
2385
2386TRACE_EVENT(ocfs2_dentry_revalidate_negative,
2387 TP_PROTO(int len, const char *name, unsigned long pgen,
2388 unsigned long gen),
2389 TP_ARGS(len, name, pgen, gen),
2390 TP_STRUCT__entry(
2391 __field(int, len)
2392 __string(name, name)
2393 __field(unsigned long, pgen)
2394 __field(unsigned long, gen)
2395 ),
2396 TP_fast_assign(
2397 __entry->len = len;
2398 __assign_str(name, name);
2399 __entry->pgen = pgen;
2400 __entry->gen = gen;
2401 ),
2402 TP_printk("%.*s %lu %lu", __entry->len, __get_str(name),
2403 __entry->pgen, __entry->gen)
2404);
2405
2406DEFINE_OCFS2_ULL_EVENT(ocfs2_dentry_revalidate_delete);
2407
2408DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_dentry_revalidate_orphaned);
2409
2410DEFINE_OCFS2_ULL_EVENT(ocfs2_dentry_revalidate_nofsdata);
2411
2412DEFINE_OCFS2_INT_EVENT(ocfs2_dentry_revalidate_ret);
2413
2414TRACE_EVENT(ocfs2_find_local_alias,
2415 TP_PROTO(int len, const char *name),
2416 TP_ARGS(len, name),
2417 TP_STRUCT__entry(
2418 __field(int, len)
2419 __string(name, name)
2420 ),
2421 TP_fast_assign(
2422 __entry->len = len;
2423 __assign_str(name, name);
2424 ),
2425 TP_printk("%.*s", __entry->len, __get_str(name))
2426);
2427
2428TRACE_EVENT(ocfs2_dentry_attach_lock,
2429 TP_PROTO(int len, const char *name,
2430 unsigned long long parent, void *fsdata),
2431 TP_ARGS(len, name, parent, fsdata),
2432 TP_STRUCT__entry(
2433 __field(int, len)
2434 __string(name, name)
2435 __field(unsigned long long, parent)
2436 __field(void *, fsdata)
2437 ),
2438 TP_fast_assign(
2439 __entry->len = len;
2440 __assign_str(name, name);
2441 __entry->parent = parent;
2442 __entry->fsdata = fsdata;
2443 ),
2444 TP_printk("%.*s %llu %p", __entry->len, __get_str(name),
2445 __entry->parent, __entry->fsdata)
2446);
2447
2448TRACE_EVENT(ocfs2_dentry_attach_lock_found,
2449 TP_PROTO(const char *name, unsigned long long parent,
2450 unsigned long long ino),
2451 TP_ARGS(name, parent, ino),
2452 TP_STRUCT__entry(
2453 __string(name, name)
2454 __field(unsigned long long, parent)
2455 __field(unsigned long long, ino)
2456 ),
2457 TP_fast_assign(
2458 __assign_str(name, name);
2459 __entry->parent = parent;
2460 __entry->ino = ino;
2461 ),
2462 TP_printk("%s %llu %llu", __get_str(name), __entry->parent, __entry->ino)
2463);
2464/* End of trace events for fs/ocfs2/dcache.c. */
2465
2466/* Trace events for fs/ocfs2/export.c. */
2467
2468TRACE_EVENT(ocfs2_get_dentry_begin,
2469 TP_PROTO(void *sb, void *handle, unsigned long long blkno),
2470 TP_ARGS(sb, handle, blkno),
2471 TP_STRUCT__entry(
2472 __field(void *, sb)
2473 __field(void *, handle)
2474 __field(unsigned long long, blkno)
2475 ),
2476 TP_fast_assign(
2477 __entry->sb = sb;
2478 __entry->handle = handle;
2479 __entry->blkno = blkno;
2480 ),
2481 TP_printk("%p %p %llu", __entry->sb, __entry->handle, __entry->blkno)
2482);
2483
2484DEFINE_OCFS2_INT_INT_EVENT(ocfs2_get_dentry_test_bit);
2485
2486DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_get_dentry_stale);
2487
2488DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_get_dentry_generation);
2489
2490DEFINE_OCFS2_POINTER_EVENT(ocfs2_get_dentry_end);
2491
2492TRACE_EVENT(ocfs2_get_parent,
2493 TP_PROTO(void *child, int len, const char *name,
2494 unsigned long long ino),
2495 TP_ARGS(child, len, name, ino),
2496 TP_STRUCT__entry(
2497 __field(void *, child)
2498 __field(int, len)
2499 __string(name, name)
2500 __field(unsigned long long, ino)
2501 ),
2502 TP_fast_assign(
2503 __entry->child = child;
2504 __entry->len = len;
2505 __assign_str(name, name);
2506 __entry->ino = ino;
2507 ),
2508 TP_printk("%p %.*s %llu", __entry->child, __entry->len,
2509 __get_str(name), __entry->ino)
2510);
2511
2512DEFINE_OCFS2_POINTER_EVENT(ocfs2_get_parent_end);
2513
2514TRACE_EVENT(ocfs2_encode_fh_begin,
2515 TP_PROTO(void *dentry, int name_len, const char *name,
2516 void *fh, int len, int connectable),
2517 TP_ARGS(dentry, name_len, name, fh, len, connectable),
2518 TP_STRUCT__entry(
2519 __field(void *, dentry)
2520 __field(int, name_len)
2521 __string(name, name)
2522 __field(void *, fh)
2523 __field(int, len)
2524 __field(int, connectable)
2525 ),
2526 TP_fast_assign(
2527 __entry->dentry = dentry;
2528 __entry->name_len = name_len;
2529 __assign_str(name, name);
2530 __entry->fh = fh;
2531 __entry->len = len;
2532 __entry->connectable = connectable;
2533 ),
2534 TP_printk("%p %.*s %p %d %d", __entry->dentry, __entry->name_len,
2535 __get_str(name), __entry->fh, __entry->len,
2536 __entry->connectable)
2537);
2538
2539DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_encode_fh_self);
2540
2541DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_encode_fh_parent);
2542
2543DEFINE_OCFS2_INT_EVENT(ocfs2_encode_fh_type);
2544
2545/* End of trace events for fs/ocfs2/export.c. */
2546
2547/* Trace events for fs/ocfs2/journal.c. */
2548
2549DEFINE_OCFS2_UINT_EVENT(ocfs2_commit_cache_begin);
2550
2551DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_commit_cache_end);
2552
2553DEFINE_OCFS2_INT_INT_EVENT(ocfs2_extend_trans);
2554
2555DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart);
2556
2557DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_journal_access);
2558
2559DEFINE_OCFS2_ULL_EVENT(ocfs2_journal_dirty);
2560
2561DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_journal_init);
2562
2563DEFINE_OCFS2_UINT_EVENT(ocfs2_journal_init_maxlen);
2564
2565DEFINE_OCFS2_INT_EVENT(ocfs2_journal_shutdown);
2566
2567DEFINE_OCFS2_POINTER_EVENT(ocfs2_journal_shutdown_wait);
2568
2569DEFINE_OCFS2_ULL_EVENT(ocfs2_complete_recovery);
2570
2571DEFINE_OCFS2_INT_EVENT(ocfs2_complete_recovery_end);
2572
2573TRACE_EVENT(ocfs2_complete_recovery_slot,
2574 TP_PROTO(int slot, unsigned long long la_ino,
2575 unsigned long long tl_ino, void *qrec),
2576 TP_ARGS(slot, la_ino, tl_ino, qrec),
2577 TP_STRUCT__entry(
2578 __field(int, slot)
2579 __field(unsigned long long, la_ino)
2580 __field(unsigned long long, tl_ino)
2581 __field(void *, qrec)
2582 ),
2583 TP_fast_assign(
2584 __entry->slot = slot;
2585 __entry->la_ino = la_ino;
2586 __entry->tl_ino = tl_ino;
2587 __entry->qrec = qrec;
2588 ),
2589 TP_printk("%d %llu %llu %p", __entry->slot, __entry->la_ino,
2590 __entry->tl_ino, __entry->qrec)
2591);
2592
2593DEFINE_OCFS2_INT_INT_EVENT(ocfs2_recovery_thread_node);
2594
2595DEFINE_OCFS2_INT_EVENT(ocfs2_recovery_thread_end);
2596
2597TRACE_EVENT(ocfs2_recovery_thread,
2598 TP_PROTO(int node_num, int osb_node_num, int disable,
2599 void *recovery_thread, int map_set),
2600 TP_ARGS(node_num, osb_node_num, disable, recovery_thread, map_set),
2601 TP_STRUCT__entry(
2602 __field(int, node_num)
2603 __field(int, osb_node_num)
2604 __field(int,disable)
2605 __field(void *, recovery_thread)
2606 __field(int,map_set)
2607 ),
2608 TP_fast_assign(
2609 __entry->node_num = node_num;
2610 __entry->osb_node_num = osb_node_num;
2611 __entry->disable = disable;
2612 __entry->recovery_thread = recovery_thread;
2613 __entry->map_set = map_set;
2614 ),
2615 TP_printk("%d %d %d %p %d", __entry->node_num,
2616 __entry->osb_node_num, __entry->disable,
2617 __entry->recovery_thread, __entry->map_set)
2618);
2619
2620DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_replay_journal_recovered);
2621
2622DEFINE_OCFS2_INT_EVENT(ocfs2_replay_journal_lock_err);
2623
2624DEFINE_OCFS2_INT_EVENT(ocfs2_replay_journal_skip);
2625
2626DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_recover_node);
2627
2628DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_recover_node_skip);
2629
2630DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_mark_dead_nodes);
2631
2632DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_queue_orphan_scan_begin);
2633
2634DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_queue_orphan_scan_end);
2635
2636DEFINE_OCFS2_ULL_EVENT(ocfs2_orphan_filldir);
2637
2638DEFINE_OCFS2_INT_EVENT(ocfs2_recover_orphans);
2639
2640DEFINE_OCFS2_ULL_EVENT(ocfs2_recover_orphans_iput);
2641
2642DEFINE_OCFS2_INT_EVENT(ocfs2_wait_on_mount);
2643
2644/* End of trace events for fs/ocfs2/journal.c. */
2645
2646/* Trace events for fs/ocfs2/buffer_head_io.c. */
2647
2648DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_read_blocks_sync);
2649
2650DEFINE_OCFS2_ULL_EVENT(ocfs2_read_blocks_sync_jbd);
2651
2652DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_read_blocks_from_disk);
2653
2654DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_read_blocks_bh);
2655
2656DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_read_blocks_end);
2657
2658TRACE_EVENT(ocfs2_write_block,
2659 TP_PROTO(unsigned long long block, void *ci),
2660 TP_ARGS(block, ci),
2661 TP_STRUCT__entry(
2662 __field(unsigned long long, block)
2663 __field(void *, ci)
2664 ),
2665 TP_fast_assign(
2666 __entry->block = block;
2667 __entry->ci = ci;
2668 ),
2669 TP_printk("%llu %p", __entry->block, __entry->ci)
2670);
2671
2672TRACE_EVENT(ocfs2_read_blocks_begin,
2673 TP_PROTO(void *ci, unsigned long long block,
2674 unsigned int nr, int flags),
2675 TP_ARGS(ci, block, nr, flags),
2676 TP_STRUCT__entry(
2677 __field(void *, ci)
2678 __field(unsigned long long, block)
2679 __field(unsigned int, nr)
2680 __field(int, flags)
2681 ),
2682 TP_fast_assign(
2683 __entry->ci = ci;
2684 __entry->block = block;
2685 __entry->nr = nr;
2686 __entry->flags = flags;
2687 ),
2688 TP_printk("%p %llu %u %d", __entry->ci, __entry->block,
2689 __entry->nr, __entry->flags)
2690);
2691
2692/* End of trace events for fs/ocfs2/buffer_head_io.c. */
2693
2694/* Trace events for fs/ocfs2/uptodate.c. */
2695
2696DEFINE_OCFS2_ULL_EVENT(ocfs2_purge_copied_metadata_tree);
2697
2698DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_metadata_cache_purge);
2699
2700DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_buffer_cached_begin);
2701
2702TRACE_EVENT(ocfs2_buffer_cached_end,
2703 TP_PROTO(int index, void *item),
2704 TP_ARGS(index, item),
2705 TP_STRUCT__entry(
2706 __field(int, index)
2707 __field(void *, item)
2708 ),
2709 TP_fast_assign(
2710 __entry->index = index;
2711 __entry->item = item;
2712 ),
2713 TP_printk("%d %p", __entry->index, __entry->item)
2714);
2715
2716DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_append_cache_array);
2717
2718DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_insert_cache_tree);
2719
2720DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_expand_cache);
2721
2722DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_set_buffer_uptodate);
2723
2724DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_set_buffer_uptodate_begin);
2725
2726DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_remove_metadata_array);
2727
2728DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_remove_metadata_tree);
2729
2730DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_remove_block_from_cache);
2731
2732/* End of trace events for fs/ocfs2/uptodate.c. */
2733#endif /* _TRACE_OCFS2_H */
2734
2735/* This part must be outside protection */
2736#undef TRACE_INCLUDE_PATH
2737#define TRACE_INCLUDE_PATH .
2738#define TRACE_INCLUDE_FILE ocfs2_trace
2739#include <trace/define_trace.h>
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index a73f64166481..279aef68025b 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -11,7 +11,6 @@
11#include <linux/writeback.h> 11#include <linux/writeback.h>
12#include <linux/workqueue.h> 12#include <linux/workqueue.h>
13 13
14#define MLOG_MASK_PREFIX ML_QUOTA
15#include <cluster/masklog.h> 14#include <cluster/masklog.h>
16 15
17#include "ocfs2_fs.h" 16#include "ocfs2_fs.h"
@@ -27,6 +26,7 @@
27#include "super.h" 26#include "super.h"
28#include "buffer_head_io.h" 27#include "buffer_head_io.h"
29#include "quota.h" 28#include "quota.h"
29#include "ocfs2_trace.h"
30 30
31/* 31/*
32 * Locking of quotas with OCFS2 is rather complex. Here are rules that 32 * Locking of quotas with OCFS2 is rather complex. Here are rules that
@@ -130,8 +130,7 @@ int ocfs2_validate_quota_block(struct super_block *sb, struct buffer_head *bh)
130 struct ocfs2_disk_dqtrailer *dqt = 130 struct ocfs2_disk_dqtrailer *dqt =
131 ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data); 131 ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data);
132 132
133 mlog(0, "Validating quota block %llu\n", 133 trace_ocfs2_validate_quota_block((unsigned long long)bh->b_blocknr);
134 (unsigned long long)bh->b_blocknr);
135 134
136 BUG_ON(!buffer_uptodate(bh)); 135 BUG_ON(!buffer_uptodate(bh));
137 136
@@ -341,8 +340,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
341 u64 pcount; 340 u64 pcount;
342 int status; 341 int status;
343 342
344 mlog_entry_void();
345
346 /* Read global header */ 343 /* Read global header */
347 gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], 344 gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type],
348 OCFS2_INVALID_SLOT); 345 OCFS2_INVALID_SLOT);
@@ -402,7 +399,8 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
402 msecs_to_jiffies(oinfo->dqi_syncms)); 399 msecs_to_jiffies(oinfo->dqi_syncms));
403 400
404out_err: 401out_err:
405 mlog_exit(status); 402 if (status)
403 mlog_errno(status);
406 return status; 404 return status;
407out_unlock: 405out_unlock:
408 ocfs2_unlock_global_qf(oinfo, 0); 406 ocfs2_unlock_global_qf(oinfo, 0);
@@ -508,9 +506,10 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
508 olditime = dquot->dq_dqb.dqb_itime; 506 olditime = dquot->dq_dqb.dqb_itime;
509 oldbtime = dquot->dq_dqb.dqb_btime; 507 oldbtime = dquot->dq_dqb.dqb_btime;
510 ocfs2_global_disk2memdqb(dquot, &dqblk); 508 ocfs2_global_disk2memdqb(dquot, &dqblk);
511 mlog(0, "Syncing global dquot %u space %lld+%lld, inodes %lld+%lld\n", 509 trace_ocfs2_sync_dquot(dquot->dq_id, dquot->dq_dqb.dqb_curspace,
512 dquot->dq_id, dquot->dq_dqb.dqb_curspace, (long long)spacechange, 510 (long long)spacechange,
513 dquot->dq_dqb.dqb_curinodes, (long long)inodechange); 511 dquot->dq_dqb.dqb_curinodes,
512 (long long)inodechange);
514 if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags)) 513 if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags))
515 dquot->dq_dqb.dqb_curspace += spacechange; 514 dquot->dq_dqb.dqb_curspace += spacechange;
516 if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags)) 515 if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags))
@@ -594,8 +593,8 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
594 struct ocfs2_super *osb = OCFS2_SB(sb); 593 struct ocfs2_super *osb = OCFS2_SB(sb);
595 int status = 0; 594 int status = 0;
596 595
597 mlog_entry("id=%u qtype=%u type=%lu device=%s\n", dquot->dq_id, 596 trace_ocfs2_sync_dquot_helper(dquot->dq_id, dquot->dq_type,
598 dquot->dq_type, type, sb->s_id); 597 type, sb->s_id);
599 if (type != dquot->dq_type) 598 if (type != dquot->dq_type)
600 goto out; 599 goto out;
601 status = ocfs2_lock_global_qf(oinfo, 1); 600 status = ocfs2_lock_global_qf(oinfo, 1);
@@ -621,7 +620,6 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
621out_ilock: 620out_ilock:
622 ocfs2_unlock_global_qf(oinfo, 1); 621 ocfs2_unlock_global_qf(oinfo, 1);
623out: 622out:
624 mlog_exit(status);
625 return status; 623 return status;
626} 624}
627 625
@@ -647,7 +645,7 @@ static int ocfs2_write_dquot(struct dquot *dquot)
647 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); 645 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
648 int status = 0; 646 int status = 0;
649 647
650 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); 648 trace_ocfs2_write_dquot(dquot->dq_id, dquot->dq_type);
651 649
652 handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS); 650 handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS);
653 if (IS_ERR(handle)) { 651 if (IS_ERR(handle)) {
@@ -660,7 +658,6 @@ static int ocfs2_write_dquot(struct dquot *dquot)
660 mutex_unlock(&sb_dqopt(dquot->dq_sb)->dqio_mutex); 658 mutex_unlock(&sb_dqopt(dquot->dq_sb)->dqio_mutex);
661 ocfs2_commit_trans(osb, handle); 659 ocfs2_commit_trans(osb, handle);
662out: 660out:
663 mlog_exit(status);
664 return status; 661 return status;
665} 662}
666 663
@@ -686,7 +683,7 @@ static int ocfs2_release_dquot(struct dquot *dquot)
686 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); 683 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
687 int status = 0; 684 int status = 0;
688 685
689 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); 686 trace_ocfs2_release_dquot(dquot->dq_id, dquot->dq_type);
690 687
691 mutex_lock(&dquot->dq_lock); 688 mutex_lock(&dquot->dq_lock);
692 /* Check whether we are not racing with some other dqget() */ 689 /* Check whether we are not racing with some other dqget() */
@@ -722,7 +719,8 @@ out_ilock:
722 ocfs2_unlock_global_qf(oinfo, 1); 719 ocfs2_unlock_global_qf(oinfo, 1);
723out: 720out:
724 mutex_unlock(&dquot->dq_lock); 721 mutex_unlock(&dquot->dq_lock);
725 mlog_exit(status); 722 if (status)
723 mlog_errno(status);
726 return status; 724 return status;
727} 725}
728 726
@@ -743,7 +741,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
743 int need_alloc = ocfs2_global_qinit_alloc(sb, type); 741 int need_alloc = ocfs2_global_qinit_alloc(sb, type);
744 handle_t *handle; 742 handle_t *handle;
745 743
746 mlog_entry("id=%u, type=%d", dquot->dq_id, type); 744 trace_ocfs2_acquire_dquot(dquot->dq_id, type);
747 mutex_lock(&dquot->dq_lock); 745 mutex_lock(&dquot->dq_lock);
748 /* 746 /*
749 * We need an exclusive lock, because we're going to update use count 747 * We need an exclusive lock, because we're going to update use count
@@ -809,7 +807,8 @@ out_dq:
809 set_bit(DQ_ACTIVE_B, &dquot->dq_flags); 807 set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
810out: 808out:
811 mutex_unlock(&dquot->dq_lock); 809 mutex_unlock(&dquot->dq_lock);
812 mlog_exit(status); 810 if (status)
811 mlog_errno(status);
813 return status; 812 return status;
814} 813}
815 814
@@ -829,7 +828,7 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
829 handle_t *handle; 828 handle_t *handle;
830 struct ocfs2_super *osb = OCFS2_SB(sb); 829 struct ocfs2_super *osb = OCFS2_SB(sb);
831 830
832 mlog_entry("id=%u, type=%d", dquot->dq_id, type); 831 trace_ocfs2_mark_dquot_dirty(dquot->dq_id, type);
833 832
834 /* In case user set some limits, sync dquot immediately to global 833 /* In case user set some limits, sync dquot immediately to global
835 * quota file so that information propagates quicker */ 834 * quota file so that information propagates quicker */
@@ -866,7 +865,8 @@ out_dlock:
866out_ilock: 865out_ilock:
867 ocfs2_unlock_global_qf(oinfo, 1); 866 ocfs2_unlock_global_qf(oinfo, 1);
868out: 867out:
869 mlog_exit(status); 868 if (status)
869 mlog_errno(status);
870 return status; 870 return status;
871} 871}
872 872
@@ -877,8 +877,6 @@ static int ocfs2_write_info(struct super_block *sb, int type)
877 int status = 0; 877 int status = 0;
878 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; 878 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
879 879
880 mlog_entry_void();
881
882 status = ocfs2_lock_global_qf(oinfo, 1); 880 status = ocfs2_lock_global_qf(oinfo, 1);
883 if (status < 0) 881 if (status < 0)
884 goto out; 882 goto out;
@@ -893,7 +891,8 @@ static int ocfs2_write_info(struct super_block *sb, int type)
893out_ilock: 891out_ilock:
894 ocfs2_unlock_global_qf(oinfo, 1); 892 ocfs2_unlock_global_qf(oinfo, 1);
895out: 893out:
896 mlog_exit(status); 894 if (status)
895 mlog_errno(status);
897 return status; 896 return status;
898} 897}
899 898
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index dc78764ccc4c..dc8007fc9247 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -8,7 +8,6 @@
8#include <linux/quotaops.h> 8#include <linux/quotaops.h>
9#include <linux/module.h> 9#include <linux/module.h>
10 10
11#define MLOG_MASK_PREFIX ML_QUOTA
12#include <cluster/masklog.h> 11#include <cluster/masklog.h>
13 12
14#include "ocfs2_fs.h" 13#include "ocfs2_fs.h"
@@ -23,6 +22,7 @@
23#include "quota.h" 22#include "quota.h"
24#include "uptodate.h" 23#include "uptodate.h"
25#include "super.h" 24#include "super.h"
25#include "ocfs2_trace.h"
26 26
27/* Number of local quota structures per block */ 27/* Number of local quota structures per block */
28static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) 28static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
@@ -475,7 +475,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
475 struct ocfs2_recovery_chunk *rchunk, *next; 475 struct ocfs2_recovery_chunk *rchunk, *next;
476 qsize_t spacechange, inodechange; 476 qsize_t spacechange, inodechange;
477 477
478 mlog_entry("ino=%lu type=%u", (unsigned long)lqinode->i_ino, type); 478 trace_ocfs2_recover_local_quota_file((unsigned long)lqinode->i_ino, type);
479 479
480 list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) { 480 list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) {
481 chunk = rchunk->rc_chunk; 481 chunk = rchunk->rc_chunk;
@@ -575,7 +575,8 @@ out_put_bh:
575 } 575 }
576 if (status < 0) 576 if (status < 0)
577 free_recovery_list(&(rec->r_list[type])); 577 free_recovery_list(&(rec->r_list[type]));
578 mlog_exit(status); 578 if (status)
579 mlog_errno(status);
579 return status; 580 return status;
580} 581}
581 582
@@ -600,7 +601,7 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
600 for (type = 0; type < MAXQUOTAS; type++) { 601 for (type = 0; type < MAXQUOTAS; type++) {
601 if (list_empty(&(rec->r_list[type]))) 602 if (list_empty(&(rec->r_list[type])))
602 continue; 603 continue;
603 mlog(0, "Recovering quota in slot %d\n", slot_num); 604 trace_ocfs2_finish_quota_recovery(slot_num);
604 lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num); 605 lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num);
605 if (!lqinode) { 606 if (!lqinode) {
606 status = -ENOENT; 607 status = -ENOENT;
@@ -882,9 +883,10 @@ static void olq_set_dquot(struct buffer_head *bh, void *private)
882 dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes - 883 dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes -
883 od->dq_originodes); 884 od->dq_originodes);
884 spin_unlock(&dq_data_lock); 885 spin_unlock(&dq_data_lock);
885 mlog(0, "Writing local dquot %u space %lld inodes %lld\n", 886 trace_olq_set_dquot(
886 od->dq_dquot.dq_id, (long long)le64_to_cpu(dqblk->dqb_spacemod), 887 (unsigned long long)le64_to_cpu(dqblk->dqb_spacemod),
887 (long long)le64_to_cpu(dqblk->dqb_inodemod)); 888 (unsigned long long)le64_to_cpu(dqblk->dqb_inodemod),
889 od->dq_dquot.dq_id);
888} 890}
889 891
890/* Write dquot to local quota file */ 892/* Write dquot to local quota file */
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index c384d634872a..5d32749c896d 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -16,7 +16,6 @@
16 */ 16 */
17 17
18#include <linux/sort.h> 18#include <linux/sort.h>
19#define MLOG_MASK_PREFIX ML_REFCOUNT
20#include <cluster/masklog.h> 19#include <cluster/masklog.h>
21#include "ocfs2.h" 20#include "ocfs2.h"
22#include "inode.h" 21#include "inode.h"
@@ -34,6 +33,7 @@
34#include "aops.h" 33#include "aops.h"
35#include "xattr.h" 34#include "xattr.h"
36#include "namei.h" 35#include "namei.h"
36#include "ocfs2_trace.h"
37 37
38#include <linux/bio.h> 38#include <linux/bio.h>
39#include <linux/blkdev.h> 39#include <linux/blkdev.h>
@@ -84,8 +84,7 @@ static int ocfs2_validate_refcount_block(struct super_block *sb,
84 struct ocfs2_refcount_block *rb = 84 struct ocfs2_refcount_block *rb =
85 (struct ocfs2_refcount_block *)bh->b_data; 85 (struct ocfs2_refcount_block *)bh->b_data;
86 86
87 mlog(0, "Validating refcount block %llu\n", 87 trace_ocfs2_validate_refcount_block((unsigned long long)bh->b_blocknr);
88 (unsigned long long)bh->b_blocknr);
89 88
90 BUG_ON(!buffer_uptodate(bh)); 89 BUG_ON(!buffer_uptodate(bh));
91 90
@@ -545,8 +544,8 @@ void ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
545 while ((node = rb_last(root)) != NULL) { 544 while ((node = rb_last(root)) != NULL) {
546 tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node); 545 tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node);
547 546
548 mlog(0, "Purge tree %llu\n", 547 trace_ocfs2_purge_refcount_trees(
549 (unsigned long long) tree->rf_blkno); 548 (unsigned long long) tree->rf_blkno);
550 549
551 rb_erase(&tree->rf_node, root); 550 rb_erase(&tree->rf_node, root);
552 ocfs2_free_refcount_tree(tree); 551 ocfs2_free_refcount_tree(tree);
@@ -575,7 +574,8 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
575 574
576 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); 575 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
577 576
578 mlog(0, "create tree for inode %lu\n", inode->i_ino); 577 trace_ocfs2_create_refcount_tree(
578 (unsigned long long)OCFS2_I(inode)->ip_blkno);
579 579
580 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); 580 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
581 if (ret) { 581 if (ret) {
@@ -646,8 +646,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
646 di->i_refcount_loc = cpu_to_le64(first_blkno); 646 di->i_refcount_loc = cpu_to_le64(first_blkno);
647 spin_unlock(&oi->ip_lock); 647 spin_unlock(&oi->ip_lock);
648 648
649 mlog(0, "created tree for inode %lu, refblock %llu\n", 649 trace_ocfs2_create_refcount_tree_blkno((unsigned long long)first_blkno);
650 inode->i_ino, (unsigned long long)first_blkno);
651 650
652 ocfs2_journal_dirty(handle, di_bh); 651 ocfs2_journal_dirty(handle, di_bh);
653 652
@@ -1256,8 +1255,9 @@ static int ocfs2_change_refcount_rec(handle_t *handle,
1256 goto out; 1255 goto out;
1257 } 1256 }
1258 1257
1259 mlog(0, "change index %d, old count %u, change %d\n", index, 1258 trace_ocfs2_change_refcount_rec(
1260 le32_to_cpu(rec->r_refcount), change); 1259 (unsigned long long)ocfs2_metadata_cache_owner(ci),
1260 index, le32_to_cpu(rec->r_refcount), change);
1261 le32_add_cpu(&rec->r_refcount, change); 1261 le32_add_cpu(&rec->r_refcount, change);
1262 1262
1263 if (!rec->r_refcount) { 1263 if (!rec->r_refcount) {
@@ -1353,8 +1353,8 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
1353 1353
1354 ocfs2_journal_dirty(handle, ref_root_bh); 1354 ocfs2_journal_dirty(handle, ref_root_bh);
1355 1355
1356 mlog(0, "new leaf block %llu, used %u\n", (unsigned long long)blkno, 1356 trace_ocfs2_expand_inline_ref_root((unsigned long long)blkno,
1357 le16_to_cpu(new_rb->rf_records.rl_used)); 1357 le16_to_cpu(new_rb->rf_records.rl_used));
1358 1358
1359 *ref_leaf_bh = new_bh; 1359 *ref_leaf_bh = new_bh;
1360 new_bh = NULL; 1360 new_bh = NULL;
@@ -1466,9 +1466,9 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
1466 (struct ocfs2_refcount_block *)new_bh->b_data; 1466 (struct ocfs2_refcount_block *)new_bh->b_data;
1467 struct ocfs2_refcount_list *new_rl = &new_rb->rf_records; 1467 struct ocfs2_refcount_list *new_rl = &new_rb->rf_records;
1468 1468
1469 mlog(0, "split old leaf refcount block %llu, count = %u, used = %u\n", 1469 trace_ocfs2_divide_leaf_refcount_block(
1470 (unsigned long long)ref_leaf_bh->b_blocknr, 1470 (unsigned long long)ref_leaf_bh->b_blocknr,
1471 le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used)); 1471 le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used));
1472 1472
1473 /* 1473 /*
1474 * XXX: Improvement later. 1474 * XXX: Improvement later.
@@ -1601,8 +1601,8 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
1601 1601
1602 ocfs2_init_refcount_extent_tree(&ref_et, ci, ref_root_bh); 1602 ocfs2_init_refcount_extent_tree(&ref_et, ci, ref_root_bh);
1603 1603
1604 mlog(0, "insert new leaf block %llu at %u\n", 1604 trace_ocfs2_new_leaf_refcount_block(
1605 (unsigned long long)new_bh->b_blocknr, new_cpos); 1605 (unsigned long long)new_bh->b_blocknr, new_cpos);
1606 1606
1607 /* Insert the new leaf block with the specific offset cpos. */ 1607 /* Insert the new leaf block with the specific offset cpos. */
1608 ret = ocfs2_insert_extent(handle, &ref_et, new_cpos, new_bh->b_blocknr, 1608 ret = ocfs2_insert_extent(handle, &ref_et, new_cpos, new_bh->b_blocknr,
@@ -1794,11 +1794,10 @@ static int ocfs2_insert_refcount_rec(handle_t *handle,
1794 (le16_to_cpu(rf_list->rl_used) - index) * 1794 (le16_to_cpu(rf_list->rl_used) - index) *
1795 sizeof(struct ocfs2_refcount_rec)); 1795 sizeof(struct ocfs2_refcount_rec));
1796 1796
1797 mlog(0, "insert refcount record start %llu, len %u, count %u " 1797 trace_ocfs2_insert_refcount_rec(
1798 "to leaf block %llu at index %d\n", 1798 (unsigned long long)ref_leaf_bh->b_blocknr, index,
1799 (unsigned long long)le64_to_cpu(rec->r_cpos), 1799 (unsigned long long)le64_to_cpu(rec->r_cpos),
1800 le32_to_cpu(rec->r_clusters), le32_to_cpu(rec->r_refcount), 1800 le32_to_cpu(rec->r_clusters), le32_to_cpu(rec->r_refcount));
1801 (unsigned long long)ref_leaf_bh->b_blocknr, index);
1802 1801
1803 rf_list->rl_recs[index] = *rec; 1802 rf_list->rl_recs[index] = *rec;
1804 1803
@@ -1850,10 +1849,12 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
1850 1849
1851 BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL); 1850 BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL);
1852 1851
1853 mlog(0, "original r_pos %llu, cluster %u, split %llu, cluster %u\n", 1852 trace_ocfs2_split_refcount_rec(le64_to_cpu(orig_rec->r_cpos),
1854 le64_to_cpu(orig_rec->r_cpos), le32_to_cpu(orig_rec->r_clusters), 1853 le32_to_cpu(orig_rec->r_clusters),
1855 le64_to_cpu(split_rec->r_cpos), 1854 le32_to_cpu(orig_rec->r_refcount),
1856 le32_to_cpu(split_rec->r_clusters)); 1855 le64_to_cpu(split_rec->r_cpos),
1856 le32_to_cpu(split_rec->r_clusters),
1857 le32_to_cpu(split_rec->r_refcount));
1857 1858
1858 /* 1859 /*
1859 * If we just need to split the header or tail clusters, 1860 * If we just need to split the header or tail clusters,
@@ -1967,12 +1968,11 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
1967 1968
1968 if (split_rec->r_refcount) { 1969 if (split_rec->r_refcount) {
1969 rf_list->rl_recs[index] = *split_rec; 1970 rf_list->rl_recs[index] = *split_rec;
1970 mlog(0, "insert refcount record start %llu, len %u, count %u " 1971 trace_ocfs2_split_refcount_rec_insert(
1971 "to leaf block %llu at index %d\n", 1972 (unsigned long long)ref_leaf_bh->b_blocknr, index,
1972 (unsigned long long)le64_to_cpu(split_rec->r_cpos), 1973 (unsigned long long)le64_to_cpu(split_rec->r_cpos),
1973 le32_to_cpu(split_rec->r_clusters), 1974 le32_to_cpu(split_rec->r_clusters),
1974 le32_to_cpu(split_rec->r_refcount), 1975 le32_to_cpu(split_rec->r_refcount));
1975 (unsigned long long)ref_leaf_bh->b_blocknr, index);
1976 1976
1977 if (merge) 1977 if (merge)
1978 ocfs2_refcount_rec_merge(rb, index); 1978 ocfs2_refcount_rec_merge(rb, index);
@@ -1997,7 +1997,7 @@ static int __ocfs2_increase_refcount(handle_t *handle,
1997 struct ocfs2_refcount_rec rec; 1997 struct ocfs2_refcount_rec rec;
1998 unsigned int set_len = 0; 1998 unsigned int set_len = 0;
1999 1999
2000 mlog(0, "Tree owner %llu, add refcount start %llu, len %u\n", 2000 trace_ocfs2_increase_refcount_begin(
2001 (unsigned long long)ocfs2_metadata_cache_owner(ci), 2001 (unsigned long long)ocfs2_metadata_cache_owner(ci),
2002 (unsigned long long)cpos, len); 2002 (unsigned long long)cpos, len);
2003 2003
@@ -2024,9 +2024,9 @@ static int __ocfs2_increase_refcount(handle_t *handle,
2024 */ 2024 */
2025 if (rec.r_refcount && le64_to_cpu(rec.r_cpos) == cpos && 2025 if (rec.r_refcount && le64_to_cpu(rec.r_cpos) == cpos &&
2026 set_len <= len) { 2026 set_len <= len) {
2027 mlog(0, "increase refcount rec, start %llu, len %u, " 2027 trace_ocfs2_increase_refcount_change(
2028 "count %u\n", (unsigned long long)cpos, set_len, 2028 (unsigned long long)cpos, set_len,
2029 le32_to_cpu(rec.r_refcount)); 2029 le32_to_cpu(rec.r_refcount));
2030 ret = ocfs2_change_refcount_rec(handle, ci, 2030 ret = ocfs2_change_refcount_rec(handle, ci,
2031 ref_leaf_bh, index, 2031 ref_leaf_bh, index,
2032 merge, 1); 2032 merge, 1);
@@ -2037,7 +2037,7 @@ static int __ocfs2_increase_refcount(handle_t *handle,
2037 } else if (!rec.r_refcount) { 2037 } else if (!rec.r_refcount) {
2038 rec.r_refcount = cpu_to_le32(1); 2038 rec.r_refcount = cpu_to_le32(1);
2039 2039
2040 mlog(0, "insert refcount rec, start %llu, len %u\n", 2040 trace_ocfs2_increase_refcount_insert(
2041 (unsigned long long)le64_to_cpu(rec.r_cpos), 2041 (unsigned long long)le64_to_cpu(rec.r_cpos),
2042 set_len); 2042 set_len);
2043 ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh, 2043 ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh,
@@ -2055,8 +2055,7 @@ static int __ocfs2_increase_refcount(handle_t *handle,
2055 rec.r_clusters = cpu_to_le32(set_len); 2055 rec.r_clusters = cpu_to_le32(set_len);
2056 le32_add_cpu(&rec.r_refcount, 1); 2056 le32_add_cpu(&rec.r_refcount, 1);
2057 2057
2058 mlog(0, "split refcount rec, start %llu, " 2058 trace_ocfs2_increase_refcount_split(
2059 "len %u, count %u\n",
2060 (unsigned long long)le64_to_cpu(rec.r_cpos), 2059 (unsigned long long)le64_to_cpu(rec.r_cpos),
2061 set_len, le32_to_cpu(rec.r_refcount)); 2060 set_len, le32_to_cpu(rec.r_refcount));
2062 ret = ocfs2_split_refcount_rec(handle, ci, 2061 ret = ocfs2_split_refcount_rec(handle, ci,
@@ -2095,6 +2094,11 @@ static int ocfs2_remove_refcount_extent(handle_t *handle,
2095 2094
2096 BUG_ON(rb->rf_records.rl_used); 2095 BUG_ON(rb->rf_records.rl_used);
2097 2096
2097 trace_ocfs2_remove_refcount_extent(
2098 (unsigned long long)ocfs2_metadata_cache_owner(ci),
2099 (unsigned long long)ref_leaf_bh->b_blocknr,
2100 le32_to_cpu(rb->rf_cpos));
2101
2098 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); 2102 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
2099 ret = ocfs2_remove_extent(handle, &et, le32_to_cpu(rb->rf_cpos), 2103 ret = ocfs2_remove_extent(handle, &et, le32_to_cpu(rb->rf_cpos),
2100 1, meta_ac, dealloc); 2104 1, meta_ac, dealloc);
@@ -2137,7 +2141,7 @@ static int ocfs2_remove_refcount_extent(handle_t *handle,
2137 if (!rb->rf_list.l_next_free_rec) { 2141 if (!rb->rf_list.l_next_free_rec) {
2138 BUG_ON(rb->rf_clusters); 2142 BUG_ON(rb->rf_clusters);
2139 2143
2140 mlog(0, "reset refcount tree root %llu to be a record block.\n", 2144 trace_ocfs2_restore_refcount_block(
2141 (unsigned long long)ref_root_bh->b_blocknr); 2145 (unsigned long long)ref_root_bh->b_blocknr);
2142 2146
2143 rb->rf_flags = 0; 2147 rb->rf_flags = 0;
@@ -2184,6 +2188,10 @@ static int ocfs2_decrease_refcount_rec(handle_t *handle,
2184 BUG_ON(cpos + len > 2188 BUG_ON(cpos + len >
2185 le64_to_cpu(rec->r_cpos) + le32_to_cpu(rec->r_clusters)); 2189 le64_to_cpu(rec->r_cpos) + le32_to_cpu(rec->r_clusters));
2186 2190
2191 trace_ocfs2_decrease_refcount_rec(
2192 (unsigned long long)ocfs2_metadata_cache_owner(ci),
2193 (unsigned long long)cpos, len);
2194
2187 if (cpos == le64_to_cpu(rec->r_cpos) && 2195 if (cpos == le64_to_cpu(rec->r_cpos) &&
2188 len == le32_to_cpu(rec->r_clusters)) 2196 len == le32_to_cpu(rec->r_clusters))
2189 ret = ocfs2_change_refcount_rec(handle, ci, 2197 ret = ocfs2_change_refcount_rec(handle, ci,
@@ -2195,12 +2203,6 @@ static int ocfs2_decrease_refcount_rec(handle_t *handle,
2195 2203
2196 le32_add_cpu(&split.r_refcount, -1); 2204 le32_add_cpu(&split.r_refcount, -1);
2197 2205
2198 mlog(0, "split refcount rec, start %llu, "
2199 "len %u, count %u, original start %llu, len %u\n",
2200 (unsigned long long)le64_to_cpu(split.r_cpos),
2201 len, le32_to_cpu(split.r_refcount),
2202 (unsigned long long)le64_to_cpu(rec->r_cpos),
2203 le32_to_cpu(rec->r_clusters));
2204 ret = ocfs2_split_refcount_rec(handle, ci, 2206 ret = ocfs2_split_refcount_rec(handle, ci,
2205 ref_root_bh, ref_leaf_bh, 2207 ref_root_bh, ref_leaf_bh,
2206 &split, index, 1, 2208 &split, index, 1,
@@ -2239,10 +2241,9 @@ static int __ocfs2_decrease_refcount(handle_t *handle,
2239 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 2241 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
2240 struct buffer_head *ref_leaf_bh = NULL; 2242 struct buffer_head *ref_leaf_bh = NULL;
2241 2243
2242 mlog(0, "Tree owner %llu, decrease refcount start %llu, " 2244 trace_ocfs2_decrease_refcount(
2243 "len %u, delete %u\n", 2245 (unsigned long long)ocfs2_metadata_cache_owner(ci),
2244 (unsigned long long)ocfs2_metadata_cache_owner(ci), 2246 (unsigned long long)cpos, len, delete);
2245 (unsigned long long)cpos, len, delete);
2246 2247
2247 while (len) { 2248 while (len) {
2248 ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 2249 ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
@@ -2352,8 +2353,8 @@ static int ocfs2_mark_extent_refcounted(struct inode *inode,
2352{ 2353{
2353 int ret; 2354 int ret;
2354 2355
2355 mlog(0, "Inode %lu refcount tree cpos %u, len %u, phys cluster %u\n", 2356 trace_ocfs2_mark_extent_refcounted(OCFS2_I(inode)->ip_blkno,
2356 inode->i_ino, cpos, len, phys); 2357 cpos, len, phys);
2357 2358
2358 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { 2359 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
2359 ocfs2_error(inode->i_sb, "Inode %lu want to use refcount " 2360 ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
@@ -2392,8 +2393,6 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2392 struct buffer_head *ref_leaf_bh = NULL, *prev_bh = NULL; 2393 struct buffer_head *ref_leaf_bh = NULL, *prev_bh = NULL;
2393 u32 len; 2394 u32 len;
2394 2395
2395 mlog(0, "start_cpos %llu, clusters %u\n",
2396 (unsigned long long)start_cpos, clusters);
2397 while (clusters) { 2396 while (clusters) {
2398 ret = ocfs2_get_refcount_rec(ci, ref_root_bh, 2397 ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
2399 cpos, clusters, &rec, 2398 cpos, clusters, &rec,
@@ -2427,12 +2426,11 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2427 2426
2428 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; 2427 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
2429 2428
2430 mlog(0, "recs_add %d,cpos %llu, clusters %u, rec->r_cpos %llu," 2429 trace_ocfs2_calc_refcount_meta_credits_iterate(
2431 "rec->r_clusters %u, rec->r_refcount %u, index %d\n", 2430 recs_add, (unsigned long long)cpos, clusters,
2432 recs_add, (unsigned long long)cpos, clusters, 2431 (unsigned long long)le64_to_cpu(rec.r_cpos),
2433 (unsigned long long)le64_to_cpu(rec.r_cpos), 2432 le32_to_cpu(rec.r_clusters),
2434 le32_to_cpu(rec.r_clusters), 2433 le32_to_cpu(rec.r_refcount), index);
2435 le32_to_cpu(rec.r_refcount), index);
2436 2434
2437 len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + 2435 len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) +
2438 le32_to_cpu(rec.r_clusters)) - cpos; 2436 le32_to_cpu(rec.r_clusters)) - cpos;
@@ -2488,7 +2486,6 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2488 if (!ref_blocks) 2486 if (!ref_blocks)
2489 goto out; 2487 goto out;
2490 2488
2491 mlog(0, "we need ref_blocks %d\n", ref_blocks);
2492 *meta_add += ref_blocks; 2489 *meta_add += ref_blocks;
2493 *credits += ref_blocks; 2490 *credits += ref_blocks;
2494 2491
@@ -2514,6 +2511,10 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2514 } 2511 }
2515 2512
2516out: 2513out:
2514
2515 trace_ocfs2_calc_refcount_meta_credits(
2516 (unsigned long long)start_cpos, clusters,
2517 *meta_add, *credits);
2517 brelse(ref_leaf_bh); 2518 brelse(ref_leaf_bh);
2518 brelse(prev_bh); 2519 brelse(prev_bh);
2519 return ret; 2520 return ret;
@@ -2578,8 +2579,7 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2578 goto out; 2579 goto out;
2579 } 2580 }
2580 2581
2581 mlog(0, "reserve new metadata %d blocks, credits = %d\n", 2582 trace_ocfs2_prepare_refcount_change_for_del(*ref_blocks, *credits);
2582 *ref_blocks, *credits);
2583 2583
2584out: 2584out:
2585 brelse(ref_root_bh); 2585 brelse(ref_root_bh);
@@ -2886,8 +2886,7 @@ static int ocfs2_lock_refcount_allocators(struct super_block *sb,
2886 goto out; 2886 goto out;
2887 } 2887 }
2888 2888
2889 mlog(0, "reserve new metadata %d, clusters %u, credits = %d\n", 2889 trace_ocfs2_lock_refcount_allocators(meta_add, *credits);
2890 meta_add, num_clusters, *credits);
2891 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(sb), meta_add, 2890 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(sb), meta_add,
2892 meta_ac); 2891 meta_ac);
2893 if (ret) { 2892 if (ret) {
@@ -2937,8 +2936,8 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2937 loff_t offset, end, map_end; 2936 loff_t offset, end, map_end;
2938 struct address_space *mapping = context->inode->i_mapping; 2937 struct address_space *mapping = context->inode->i_mapping;
2939 2938
2940 mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, 2939 trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster,
2941 new_cluster, new_len, cpos); 2940 new_cluster, new_len);
2942 2941
2943 readahead_pages = 2942 readahead_pages =
2944 (ocfs2_cow_contig_clusters(sb) << 2943 (ocfs2_cow_contig_clusters(sb) <<
@@ -3031,8 +3030,8 @@ static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
3031 struct buffer_head *old_bh = NULL; 3030 struct buffer_head *old_bh = NULL;
3032 struct buffer_head *new_bh = NULL; 3031 struct buffer_head *new_bh = NULL;
3033 3032
3034 mlog(0, "old_cluster %u, new %u, len %u\n", old_cluster, 3033 trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster,
3035 new_cluster, new_len); 3034 new_cluster, new_len);
3036 3035
3037 for (i = 0; i < blocks; i++, old_block++, new_block++) { 3036 for (i = 0; i < blocks; i++, old_block++, new_block++) {
3038 new_bh = sb_getblk(osb->sb, new_block); 3037 new_bh = sb_getblk(osb->sb, new_block);
@@ -3085,8 +3084,8 @@ static int ocfs2_clear_ext_refcount(handle_t *handle,
3085 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci); 3084 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
3086 u64 ino = ocfs2_metadata_cache_owner(et->et_ci); 3085 u64 ino = ocfs2_metadata_cache_owner(et->et_ci);
3087 3086
3088 mlog(0, "inode %llu cpos %u, len %u, p_cluster %u, ext_flags %u\n", 3087 trace_ocfs2_clear_ext_refcount((unsigned long long)ino,
3089 (unsigned long long)ino, cpos, len, p_cluster, ext_flags); 3088 cpos, len, p_cluster, ext_flags);
3090 3089
3091 memset(&replace_rec, 0, sizeof(replace_rec)); 3090 memset(&replace_rec, 0, sizeof(replace_rec));
3092 replace_rec.e_cpos = cpu_to_le32(cpos); 3091 replace_rec.e_cpos = cpu_to_le32(cpos);
@@ -3141,8 +3140,8 @@ static int ocfs2_replace_clusters(handle_t *handle,
3141 struct ocfs2_caching_info *ci = context->data_et.et_ci; 3140 struct ocfs2_caching_info *ci = context->data_et.et_ci;
3142 u64 ino = ocfs2_metadata_cache_owner(ci); 3141 u64 ino = ocfs2_metadata_cache_owner(ci);
3143 3142
3144 mlog(0, "inode %llu, cpos %u, old %u, new %u, len %u, ext_flags %u\n", 3143 trace_ocfs2_replace_clusters((unsigned long long)ino,
3145 (unsigned long long)ino, cpos, old, new, len, ext_flags); 3144 cpos, old, new, len, ext_flags);
3146 3145
3147 /*If the old clusters is unwritten, no need to duplicate. */ 3146 /*If the old clusters is unwritten, no need to duplicate. */
3148 if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { 3147 if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
@@ -3236,8 +3235,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3236 struct ocfs2_caching_info *ref_ci = &context->ref_tree->rf_ci; 3235 struct ocfs2_caching_info *ref_ci = &context->ref_tree->rf_ci;
3237 struct ocfs2_refcount_rec rec; 3236 struct ocfs2_refcount_rec rec;
3238 3237
3239 mlog(0, "cpos %u, p_cluster %u, num_clusters %u, e_flags %u\n", 3238 trace_ocfs2_make_clusters_writable(cpos, p_cluster,
3240 cpos, p_cluster, num_clusters, e_flags); 3239 num_clusters, e_flags);
3241 3240
3242 ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters, 3241 ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters,
3243 &context->data_et, 3242 &context->data_et,
@@ -3475,9 +3474,9 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
3475 goto out; 3474 goto out;
3476 } 3475 }
3477 3476
3478 mlog(0, "CoW inode %lu, cpos %u, write_len %u, cow_start %u, " 3477 trace_ocfs2_refcount_cow_hunk(OCFS2_I(inode)->ip_blkno,
3479 "cow_len %u\n", inode->i_ino, 3478 cpos, write_len, max_cpos,
3480 cpos, write_len, cow_start, cow_len); 3479 cow_start, cow_len);
3481 3480
3482 BUG_ON(cow_len == 0); 3481 BUG_ON(cow_len == 0);
3483 3482
@@ -3756,8 +3755,7 @@ int ocfs2_add_refcount_flag(struct inode *inode,
3756 goto out; 3755 goto out;
3757 } 3756 }
3758 3757
3759 mlog(0, "reserve new metadata %d, credits = %d\n", 3758 trace_ocfs2_add_refcount_flag(ref_blocks, credits);
3760 ref_blocks, credits);
3761 3759
3762 if (ref_blocks) { 3760 if (ref_blocks) {
3763 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), 3761 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index 3e78db361bc7..41ffd36c689c 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -30,10 +30,10 @@
30#include <linux/bitops.h> 30#include <linux/bitops.h>
31#include <linux/list.h> 31#include <linux/list.h>
32 32
33#define MLOG_MASK_PREFIX ML_RESERVATIONS
34#include <cluster/masklog.h> 33#include <cluster/masklog.h>
35 34
36#include "ocfs2.h" 35#include "ocfs2.h"
36#include "ocfs2_trace.h"
37 37
38#ifdef CONFIG_OCFS2_DEBUG_FS 38#ifdef CONFIG_OCFS2_DEBUG_FS
39#define OCFS2_CHECK_RESERVATIONS 39#define OCFS2_CHECK_RESERVATIONS
@@ -321,8 +321,7 @@ static void ocfs2_resv_insert(struct ocfs2_reservation_map *resmap,
321 321
322 assert_spin_locked(&resv_lock); 322 assert_spin_locked(&resv_lock);
323 323
324 mlog(0, "Insert reservation start: %u len: %u\n", new->r_start, 324 trace_ocfs2_resv_insert(new->r_start, new->r_len);
325 new->r_len);
326 325
327 while (*p) { 326 while (*p) {
328 parent = *p; 327 parent = *p;
@@ -423,8 +422,8 @@ static int ocfs2_resmap_find_free_bits(struct ocfs2_reservation_map *resmap,
423 unsigned int best_start, best_len = 0; 422 unsigned int best_start, best_len = 0;
424 int offset, start, found; 423 int offset, start, found;
425 424
426 mlog(0, "Find %u bits within range (%u, len %u) resmap len: %u\n", 425 trace_ocfs2_resmap_find_free_bits_begin(search_start, search_len,
427 wanted, search_start, search_len, resmap->m_bitmap_len); 426 wanted, resmap->m_bitmap_len);
428 427
429 found = best_start = best_len = 0; 428 found = best_start = best_len = 0;
430 429
@@ -463,7 +462,7 @@ static int ocfs2_resmap_find_free_bits(struct ocfs2_reservation_map *resmap,
463 *rlen = best_len; 462 *rlen = best_len;
464 *rstart = best_start; 463 *rstart = best_start;
465 464
466 mlog(0, "Found start: %u len: %u\n", best_start, best_len); 465 trace_ocfs2_resmap_find_free_bits_end(best_start, best_len);
467 466
468 return *rlen; 467 return *rlen;
469} 468}
@@ -487,9 +486,8 @@ static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
487 * - our window should be last in all reservations 486 * - our window should be last in all reservations
488 * - need to make sure we don't go past end of bitmap 487 * - need to make sure we don't go past end of bitmap
489 */ 488 */
490 489 trace_ocfs2_resv_find_window_begin(resv->r_start, ocfs2_resv_end(resv),
491 mlog(0, "resv start: %u resv end: %u goal: %u wanted: %u\n", 490 goal, wanted, RB_EMPTY_ROOT(root));
492 resv->r_start, ocfs2_resv_end(resv), goal, wanted);
493 491
494 assert_spin_locked(&resv_lock); 492 assert_spin_locked(&resv_lock);
495 493
@@ -498,9 +496,6 @@ static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
498 * Easiest case - empty tree. We can just take 496 * Easiest case - empty tree. We can just take
499 * whatever window of free bits we want. 497 * whatever window of free bits we want.
500 */ 498 */
501
502 mlog(0, "Empty root\n");
503
504 clen = ocfs2_resmap_find_free_bits(resmap, wanted, goal, 499 clen = ocfs2_resmap_find_free_bits(resmap, wanted, goal,
505 resmap->m_bitmap_len - goal, 500 resmap->m_bitmap_len - goal,
506 &cstart, &clen); 501 &cstart, &clen);
@@ -524,8 +519,6 @@ static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
524 prev_resv = ocfs2_find_resv_lhs(resmap, goal); 519 prev_resv = ocfs2_find_resv_lhs(resmap, goal);
525 520
526 if (prev_resv == NULL) { 521 if (prev_resv == NULL) {
527 mlog(0, "Goal on LHS of leftmost window\n");
528
529 /* 522 /*
530 * A NULL here means that the search code couldn't 523 * A NULL here means that the search code couldn't
531 * find a window that starts before goal. 524 * find a window that starts before goal.
@@ -570,13 +563,15 @@ static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
570 next_resv = NULL; 563 next_resv = NULL;
571 } 564 }
572 565
566 trace_ocfs2_resv_find_window_prev(prev_resv->r_start,
567 ocfs2_resv_end(prev_resv));
568
573 prev = &prev_resv->r_node; 569 prev = &prev_resv->r_node;
574 570
575 /* Now we do a linear search for a window, starting at 'prev_rsv' */ 571 /* Now we do a linear search for a window, starting at 'prev_rsv' */
576 while (1) { 572 while (1) {
577 next = rb_next(prev); 573 next = rb_next(prev);
578 if (next) { 574 if (next) {
579 mlog(0, "One more resv found in linear search\n");
580 next_resv = rb_entry(next, 575 next_resv = rb_entry(next,
581 struct ocfs2_alloc_reservation, 576 struct ocfs2_alloc_reservation,
582 r_node); 577 r_node);
@@ -585,7 +580,6 @@ static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
585 gap_end = next_resv->r_start - 1; 580 gap_end = next_resv->r_start - 1;
586 gap_len = gap_end - gap_start + 1; 581 gap_len = gap_end - gap_start + 1;
587 } else { 582 } else {
588 mlog(0, "No next node\n");
589 /* 583 /*
590 * We're at the rightmost edge of the 584 * We're at the rightmost edge of the
591 * tree. See if a reservation between this 585 * tree. See if a reservation between this
@@ -596,6 +590,8 @@ static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
596 gap_end = resmap->m_bitmap_len - 1; 590 gap_end = resmap->m_bitmap_len - 1;
597 } 591 }
598 592
593 trace_ocfs2_resv_find_window_next(next ? next_resv->r_start: -1,
594 next ? ocfs2_resv_end(next_resv) : -1);
599 /* 595 /*
600 * No need to check this gap if we have already found 596 * No need to check this gap if we have already found
601 * a larger region of free bits. 597 * a larger region of free bits.
@@ -654,8 +650,9 @@ static void ocfs2_cannibalize_resv(struct ocfs2_reservation_map *resmap,
654 lru_resv = list_first_entry(&resmap->m_lru, 650 lru_resv = list_first_entry(&resmap->m_lru,
655 struct ocfs2_alloc_reservation, r_lru); 651 struct ocfs2_alloc_reservation, r_lru);
656 652
657 mlog(0, "lru resv: start: %u len: %u end: %u\n", lru_resv->r_start, 653 trace_ocfs2_cannibalize_resv_begin(lru_resv->r_start,
658 lru_resv->r_len, ocfs2_resv_end(lru_resv)); 654 lru_resv->r_len,
655 ocfs2_resv_end(lru_resv));
659 656
660 /* 657 /*
661 * Cannibalize (some or all) of the target reservation and 658 * Cannibalize (some or all) of the target reservation and
@@ -684,10 +681,9 @@ static void ocfs2_cannibalize_resv(struct ocfs2_reservation_map *resmap,
684 resv->r_len = shrink; 681 resv->r_len = shrink;
685 } 682 }
686 683
687 mlog(0, "Reservation now looks like: r_start: %u r_end: %u " 684 trace_ocfs2_cannibalize_resv_end(resv->r_start, ocfs2_resv_end(resv),
688 "r_len: %u r_last_start: %u r_last_len: %u\n", 685 resv->r_len, resv->r_last_start,
689 resv->r_start, ocfs2_resv_end(resv), resv->r_len, 686 resv->r_last_len);
690 resv->r_last_start, resv->r_last_len);
691 687
692 ocfs2_resv_insert(resmap, resv); 688 ocfs2_resv_insert(resmap, resv);
693} 689}
@@ -748,7 +744,6 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
748 if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) 744 if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
749 wanted = *clen; 745 wanted = *clen;
750 746
751 mlog(0, "empty reservation, find new window\n");
752 /* 747 /*
753 * Try to get a window here. If it works, we must fall 748 * Try to get a window here. If it works, we must fall
754 * through and test the bitmap . This avoids some 749 * through and test the bitmap . This avoids some
@@ -757,6 +752,7 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
757 * that inode. 752 * that inode.
758 */ 753 */
759 ocfs2_resv_find_window(resmap, resv, wanted); 754 ocfs2_resv_find_window(resmap, resv, wanted);
755 trace_ocfs2_resmap_resv_bits(resv->r_start, resv->r_len);
760 } 756 }
761 757
762 BUG_ON(ocfs2_resv_empty(resv)); 758 BUG_ON(ocfs2_resv_empty(resv));
@@ -813,10 +809,10 @@ void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
813 809
814 spin_lock(&resv_lock); 810 spin_lock(&resv_lock);
815 811
816 mlog(0, "claim bits: cstart: %u cend: %u clen: %u r_start: %u " 812 trace_ocfs2_resmap_claimed_bits_begin(cstart, cend, clen, resv->r_start,
817 "r_end: %u r_len: %u, r_last_start: %u r_last_len: %u\n", 813 ocfs2_resv_end(resv), resv->r_len,
818 cstart, cend, clen, resv->r_start, ocfs2_resv_end(resv), 814 resv->r_last_start,
819 resv->r_len, resv->r_last_start, resv->r_last_len); 815 resv->r_last_len);
820 816
821 BUG_ON(cstart < resv->r_start); 817 BUG_ON(cstart < resv->r_start);
822 BUG_ON(cstart > ocfs2_resv_end(resv)); 818 BUG_ON(cstart > ocfs2_resv_end(resv));
@@ -833,10 +829,9 @@ void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
833 if (!ocfs2_resv_empty(resv)) 829 if (!ocfs2_resv_empty(resv))
834 ocfs2_resv_mark_lru(resmap, resv); 830 ocfs2_resv_mark_lru(resmap, resv);
835 831
836 mlog(0, "Reservation now looks like: r_start: %u r_end: %u " 832 trace_ocfs2_resmap_claimed_bits_end(resv->r_start, ocfs2_resv_end(resv),
837 "r_len: %u r_last_start: %u r_last_len: %u\n", 833 resv->r_len, resv->r_last_start,
838 resv->r_start, ocfs2_resv_end(resv), resv->r_len, 834 resv->r_last_len);
839 resv->r_last_start, resv->r_last_len);
840 835
841 ocfs2_check_resmap(resmap); 836 ocfs2_check_resmap(resmap);
842 837
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index dacd553d8617..ec55add7604a 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -27,7 +27,6 @@
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/types.h> 28#include <linux/types.h>
29 29
30#define MLOG_MASK_PREFIX ML_DISK_ALLOC
31#include <cluster/masklog.h> 30#include <cluster/masklog.h>
32 31
33#include "ocfs2.h" 32#include "ocfs2.h"
@@ -39,6 +38,7 @@
39#include "super.h" 38#include "super.h"
40#include "sysfile.h" 39#include "sysfile.h"
41#include "uptodate.h" 40#include "uptodate.h"
41#include "ocfs2_trace.h"
42 42
43#include "buffer_head_io.h" 43#include "buffer_head_io.h"
44#include "suballoc.h" 44#include "suballoc.h"
@@ -82,7 +82,6 @@ static u16 ocfs2_calc_new_backup_super(struct inode *inode,
82 backups++; 82 backups++;
83 } 83 }
84 84
85 mlog_exit_void();
86 return backups; 85 return backups;
87} 86}
88 87
@@ -103,8 +102,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
103 u16 cl_bpc = le16_to_cpu(cl->cl_bpc); 102 u16 cl_bpc = le16_to_cpu(cl->cl_bpc);
104 u16 cl_cpg = le16_to_cpu(cl->cl_cpg); 103 u16 cl_cpg = le16_to_cpu(cl->cl_cpg);
105 104
106 mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n", 105 trace_ocfs2_update_last_group_and_inode(new_clusters,
107 new_clusters, first_new_cluster); 106 first_new_cluster);
108 107
109 ret = ocfs2_journal_access_gd(handle, INODE_CACHE(bm_inode), 108 ret = ocfs2_journal_access_gd(handle, INODE_CACHE(bm_inode),
110 group_bh, OCFS2_JOURNAL_ACCESS_WRITE); 109 group_bh, OCFS2_JOURNAL_ACCESS_WRITE);
@@ -176,7 +175,8 @@ out_rollback:
176 le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits); 175 le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits);
177 } 176 }
178out: 177out:
179 mlog_exit(ret); 178 if (ret)
179 mlog_errno(ret);
180 return ret; 180 return ret;
181} 181}
182 182
@@ -281,8 +281,6 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
281 u32 first_new_cluster; 281 u32 first_new_cluster;
282 u64 lgd_blkno; 282 u64 lgd_blkno;
283 283
284 mlog_entry_void();
285
286 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) 284 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
287 return -EROFS; 285 return -EROFS;
288 286
@@ -342,7 +340,8 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
342 goto out_unlock; 340 goto out_unlock;
343 } 341 }
344 342
345 mlog(0, "extend the last group at %llu, new clusters = %d\n", 343
344 trace_ocfs2_group_extend(
346 (unsigned long long)le64_to_cpu(group->bg_blkno), new_clusters); 345 (unsigned long long)le64_to_cpu(group->bg_blkno), new_clusters);
347 346
348 handle = ocfs2_start_trans(osb, OCFS2_GROUP_EXTEND_CREDITS); 347 handle = ocfs2_start_trans(osb, OCFS2_GROUP_EXTEND_CREDITS);
@@ -377,7 +376,6 @@ out_mutex:
377 iput(main_bm_inode); 376 iput(main_bm_inode);
378 377
379out: 378out:
380 mlog_exit_void();
381 return ret; 379 return ret;
382} 380}
383 381
@@ -472,8 +470,6 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
472 struct ocfs2_chain_rec *cr; 470 struct ocfs2_chain_rec *cr;
473 u16 cl_bpc; 471 u16 cl_bpc;
474 472
475 mlog_entry_void();
476
477 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) 473 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
478 return -EROFS; 474 return -EROFS;
479 475
@@ -520,8 +516,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
520 goto out_unlock; 516 goto out_unlock;
521 } 517 }
522 518
523 mlog(0, "Add a new group %llu in chain = %u, length = %u\n", 519 trace_ocfs2_group_add((unsigned long long)input->group,
524 (unsigned long long)input->group, input->chain, input->clusters); 520 input->chain, input->clusters, input->frees);
525 521
526 handle = ocfs2_start_trans(osb, OCFS2_GROUP_ADD_CREDITS); 522 handle = ocfs2_start_trans(osb, OCFS2_GROUP_ADD_CREDITS);
527 if (IS_ERR(handle)) { 523 if (IS_ERR(handle)) {
@@ -589,6 +585,5 @@ out_mutex:
589 iput(main_bm_inode); 585 iput(main_bm_inode);
590 586
591out: 587out:
592 mlog_exit_void();
593 return ret; 588 return ret;
594} 589}
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index ab4e0172cc1d..26fc0014d509 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -27,7 +27,6 @@
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29 29
30#define MLOG_MASK_PREFIX ML_SUPER
31#include <cluster/masklog.h> 30#include <cluster/masklog.h>
32 31
33#include "ocfs2.h" 32#include "ocfs2.h"
@@ -39,6 +38,7 @@
39#include "slot_map.h" 38#include "slot_map.h"
40#include "super.h" 39#include "super.h"
41#include "sysfile.h" 40#include "sysfile.h"
41#include "ocfs2_trace.h"
42 42
43#include "buffer_head_io.h" 43#include "buffer_head_io.h"
44 44
@@ -142,8 +142,7 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
142 BUG_ON(si->si_blocks == 0); 142 BUG_ON(si->si_blocks == 0);
143 BUG_ON(si->si_bh == NULL); 143 BUG_ON(si->si_bh == NULL);
144 144
145 mlog(0, "Refreshing slot map, reading %u block(s)\n", 145 trace_ocfs2_refresh_slot_info(si->si_blocks);
146 si->si_blocks);
147 146
148 /* 147 /*
149 * We pass -1 as blocknr because we expect all of si->si_bh to 148 * We pass -1 as blocknr because we expect all of si->si_bh to
@@ -381,8 +380,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
381 /* The size checks above should ensure this */ 380 /* The size checks above should ensure this */
382 BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks); 381 BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks);
383 382
384 mlog(0, "Slot map needs %u buffers for %llu bytes\n", 383 trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
385 si->si_blocks, bytes);
386 384
387 si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks, 385 si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks,
388 GFP_KERNEL); 386 GFP_KERNEL);
@@ -400,8 +398,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
400 goto bail; 398 goto bail;
401 } 399 }
402 400
403 mlog(0, "Reading slot map block %u at %llu\n", i, 401 trace_ocfs2_map_slot_buffers_block((unsigned long long)blkno, i);
404 (unsigned long long)blkno);
405 402
406 bh = NULL; /* Acquire a fresh bh */ 403 bh = NULL; /* Acquire a fresh bh */
407 status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno, 404 status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno,
@@ -475,8 +472,6 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
475 int slot; 472 int slot;
476 struct ocfs2_slot_info *si; 473 struct ocfs2_slot_info *si;
477 474
478 mlog_entry_void();
479
480 si = osb->slot_info; 475 si = osb->slot_info;
481 476
482 spin_lock(&osb->osb_lock); 477 spin_lock(&osb->osb_lock);
@@ -505,14 +500,13 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
505 osb->slot_num = slot; 500 osb->slot_num = slot;
506 spin_unlock(&osb->osb_lock); 501 spin_unlock(&osb->osb_lock);
507 502
508 mlog(0, "taking node slot %d\n", osb->slot_num); 503 trace_ocfs2_find_slot(osb->slot_num);
509 504
510 status = ocfs2_update_disk_slot(osb, si, osb->slot_num); 505 status = ocfs2_update_disk_slot(osb, si, osb->slot_num);
511 if (status < 0) 506 if (status < 0)
512 mlog_errno(status); 507 mlog_errno(status);
513 508
514bail: 509bail:
515 mlog_exit(status);
516 return status; 510 return status;
517} 511}
518 512
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 71998d4d61d5..ab6e2061074f 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -29,7 +29,6 @@
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/highmem.h> 30#include <linux/highmem.h>
31 31
32#define MLOG_MASK_PREFIX ML_DISK_ALLOC
33#include <cluster/masklog.h> 32#include <cluster/masklog.h>
34 33
35#include "ocfs2.h" 34#include "ocfs2.h"
@@ -44,6 +43,7 @@
44#include "super.h" 43#include "super.h"
45#include "sysfile.h" 44#include "sysfile.h"
46#include "uptodate.h" 45#include "uptodate.h"
46#include "ocfs2_trace.h"
47 47
48#include "buffer_head_io.h" 48#include "buffer_head_io.h"
49 49
@@ -308,8 +308,8 @@ static int ocfs2_validate_group_descriptor(struct super_block *sb,
308 int rc; 308 int rc;
309 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; 309 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
310 310
311 mlog(0, "Validating group descriptor %llu\n", 311 trace_ocfs2_validate_group_descriptor(
312 (unsigned long long)bh->b_blocknr); 312 (unsigned long long)bh->b_blocknr);
313 313
314 BUG_ON(!buffer_uptodate(bh)); 314 BUG_ON(!buffer_uptodate(bh));
315 315
@@ -389,8 +389,6 @@ static int ocfs2_block_group_fill(handle_t *handle,
389 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 389 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
390 struct super_block * sb = alloc_inode->i_sb; 390 struct super_block * sb = alloc_inode->i_sb;
391 391
392 mlog_entry_void();
393
394 if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) { 392 if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
395 ocfs2_error(alloc_inode->i_sb, "group block (%llu) != " 393 ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
396 "b_blocknr (%llu)", 394 "b_blocknr (%llu)",
@@ -436,7 +434,8 @@ static int ocfs2_block_group_fill(handle_t *handle,
436 * allocation time. */ 434 * allocation time. */
437 435
438bail: 436bail:
439 mlog_exit(status); 437 if (status)
438 mlog_errno(status);
440 return status; 439 return status;
441} 440}
442 441
@@ -477,8 +476,8 @@ ocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle,
477 476
478 /* setup the group */ 477 /* setup the group */
479 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); 478 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
480 mlog(0, "new descriptor, record %u, at block %llu\n", 479 trace_ocfs2_block_group_alloc_contig(
481 alloc_rec, (unsigned long long)bg_blkno); 480 (unsigned long long)bg_blkno, alloc_rec);
482 481
483 bg_bh = sb_getblk(osb->sb, bg_blkno); 482 bg_bh = sb_getblk(osb->sb, bg_blkno);
484 if (!bg_bh) { 483 if (!bg_bh) {
@@ -657,8 +656,8 @@ ocfs2_block_group_alloc_discontig(handle_t *handle,
657 656
658 /* setup the group */ 657 /* setup the group */
659 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); 658 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
660 mlog(0, "new descriptor, record %u, at block %llu\n", 659 trace_ocfs2_block_group_alloc_discontig(
661 alloc_rec, (unsigned long long)bg_blkno); 660 (unsigned long long)bg_blkno, alloc_rec);
662 661
663 bg_bh = sb_getblk(osb->sb, bg_blkno); 662 bg_bh = sb_getblk(osb->sb, bg_blkno);
664 if (!bg_bh) { 663 if (!bg_bh) {
@@ -707,8 +706,6 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
707 706
708 BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode)); 707 BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode));
709 708
710 mlog_entry_void();
711
712 cl = &fe->id2.i_chain; 709 cl = &fe->id2.i_chain;
713 status = ocfs2_reserve_clusters_with_limit(osb, 710 status = ocfs2_reserve_clusters_with_limit(osb,
714 le16_to_cpu(cl->cl_cpg), 711 le16_to_cpu(cl->cl_cpg),
@@ -730,8 +727,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
730 } 727 }
731 728
732 if (last_alloc_group && *last_alloc_group != 0) { 729 if (last_alloc_group && *last_alloc_group != 0) {
733 mlog(0, "use old allocation group %llu for block group alloc\n", 730 trace_ocfs2_block_group_alloc(
734 (unsigned long long)*last_alloc_group); 731 (unsigned long long)*last_alloc_group);
735 ac->ac_last_group = *last_alloc_group; 732 ac->ac_last_group = *last_alloc_group;
736 } 733 }
737 734
@@ -796,7 +793,8 @@ bail:
796 793
797 brelse(bg_bh); 794 brelse(bg_bh);
798 795
799 mlog_exit(status); 796 if (status)
797 mlog_errno(status);
800 return status; 798 return status;
801} 799}
802 800
@@ -814,8 +812,6 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
814 struct ocfs2_dinode *fe; 812 struct ocfs2_dinode *fe;
815 u32 free_bits; 813 u32 free_bits;
816 814
817 mlog_entry_void();
818
819 alloc_inode = ocfs2_get_system_file_inode(osb, type, slot); 815 alloc_inode = ocfs2_get_system_file_inode(osb, type, slot);
820 if (!alloc_inode) { 816 if (!alloc_inode) {
821 mlog_errno(-EINVAL); 817 mlog_errno(-EINVAL);
@@ -855,16 +851,15 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
855 if (bits_wanted > free_bits) { 851 if (bits_wanted > free_bits) {
856 /* cluster bitmap never grows */ 852 /* cluster bitmap never grows */
857 if (ocfs2_is_cluster_bitmap(alloc_inode)) { 853 if (ocfs2_is_cluster_bitmap(alloc_inode)) {
858 mlog(0, "Disk Full: wanted=%u, free_bits=%u\n", 854 trace_ocfs2_reserve_suballoc_bits_nospc(bits_wanted,
859 bits_wanted, free_bits); 855 free_bits);
860 status = -ENOSPC; 856 status = -ENOSPC;
861 goto bail; 857 goto bail;
862 } 858 }
863 859
864 if (!(flags & ALLOC_NEW_GROUP)) { 860 if (!(flags & ALLOC_NEW_GROUP)) {
865 mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, " 861 trace_ocfs2_reserve_suballoc_bits_no_new_group(
866 "and we don't alloc a new group for it.\n", 862 slot, bits_wanted, free_bits);
867 slot, bits_wanted, free_bits);
868 status = -ENOSPC; 863 status = -ENOSPC;
869 goto bail; 864 goto bail;
870 } 865 }
@@ -890,7 +885,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
890bail: 885bail:
891 brelse(bh); 886 brelse(bh);
892 887
893 mlog_exit(status); 888 if (status)
889 mlog_errno(status);
894 return status; 890 return status;
895} 891}
896 892
@@ -1052,7 +1048,8 @@ bail:
1052 *ac = NULL; 1048 *ac = NULL;
1053 } 1049 }
1054 1050
1055 mlog_exit(status); 1051 if (status)
1052 mlog_errno(status);
1056 return status; 1053 return status;
1057} 1054}
1058 1055
@@ -1119,8 +1116,8 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
1119 spin_lock(&osb->osb_lock); 1116 spin_lock(&osb->osb_lock);
1120 osb->osb_inode_alloc_group = alloc_group; 1117 osb->osb_inode_alloc_group = alloc_group;
1121 spin_unlock(&osb->osb_lock); 1118 spin_unlock(&osb->osb_lock);
1122 mlog(0, "after reservation, new allocation group is " 1119 trace_ocfs2_reserve_new_inode_new_group(
1123 "%llu\n", (unsigned long long)alloc_group); 1120 (unsigned long long)alloc_group);
1124 1121
1125 /* 1122 /*
1126 * Some inodes must be freed by us, so try to allocate 1123 * Some inodes must be freed by us, so try to allocate
@@ -1152,7 +1149,8 @@ bail:
1152 *ac = NULL; 1149 *ac = NULL;
1153 } 1150 }
1154 1151
1155 mlog_exit(status); 1152 if (status)
1153 mlog_errno(status);
1156 return status; 1154 return status;
1157} 1155}
1158 1156
@@ -1189,8 +1187,6 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
1189{ 1187{
1190 int status; 1188 int status;
1191 1189
1192 mlog_entry_void();
1193
1194 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 1190 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
1195 if (!(*ac)) { 1191 if (!(*ac)) {
1196 status = -ENOMEM; 1192 status = -ENOMEM;
@@ -1229,7 +1225,8 @@ bail:
1229 *ac = NULL; 1225 *ac = NULL;
1230 } 1226 }
1231 1227
1232 mlog_exit(status); 1228 if (status)
1229 mlog_errno(status);
1233 return status; 1230 return status;
1234} 1231}
1235 1232
@@ -1357,15 +1354,12 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
1357 void *bitmap = bg->bg_bitmap; 1354 void *bitmap = bg->bg_bitmap;
1358 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; 1355 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
1359 1356
1360 mlog_entry_void();
1361
1362 /* All callers get the descriptor via 1357 /* All callers get the descriptor via
1363 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ 1358 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
1364 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); 1359 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
1365 BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); 1360 BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
1366 1361
1367 mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, 1362 trace_ocfs2_block_group_set_bits(bit_off, num_bits);
1368 num_bits);
1369 1363
1370 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1364 if (ocfs2_is_cluster_bitmap(alloc_inode))
1371 journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 1365 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
@@ -1394,7 +1388,8 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
1394 ocfs2_journal_dirty(handle, group_bh); 1388 ocfs2_journal_dirty(handle, group_bh);
1395 1389
1396bail: 1390bail:
1397 mlog_exit(status); 1391 if (status)
1392 mlog_errno(status);
1398 return status; 1393 return status;
1399} 1394}
1400 1395
@@ -1437,10 +1432,10 @@ static int ocfs2_relink_block_group(handle_t *handle,
1437 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); 1432 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
1438 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg)); 1433 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg));
1439 1434
1440 mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n", 1435 trace_ocfs2_relink_block_group(
1441 (unsigned long long)le64_to_cpu(fe->i_blkno), chain, 1436 (unsigned long long)le64_to_cpu(fe->i_blkno), chain,
1442 (unsigned long long)le64_to_cpu(bg->bg_blkno), 1437 (unsigned long long)le64_to_cpu(bg->bg_blkno),
1443 (unsigned long long)le64_to_cpu(prev_bg->bg_blkno)); 1438 (unsigned long long)le64_to_cpu(prev_bg->bg_blkno));
1444 1439
1445 fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno); 1440 fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
1446 bg_ptr = le64_to_cpu(bg->bg_next_group); 1441 bg_ptr = le64_to_cpu(bg->bg_next_group);
@@ -1484,7 +1479,8 @@ out_rollback:
1484 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); 1479 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
1485 } 1480 }
1486 1481
1487 mlog_exit(status); 1482 if (status)
1483 mlog_errno(status);
1488 return status; 1484 return status;
1489} 1485}
1490 1486
@@ -1525,10 +1521,10 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1525 if ((gd_cluster_off + max_bits) > 1521 if ((gd_cluster_off + max_bits) >
1526 OCFS2_I(inode)->ip_clusters) { 1522 OCFS2_I(inode)->ip_clusters) {
1527 max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off; 1523 max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
1528 mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n", 1524 trace_ocfs2_cluster_group_search_wrong_max_bits(
1529 (unsigned long long)le64_to_cpu(gd->bg_blkno), 1525 (unsigned long long)le64_to_cpu(gd->bg_blkno),
1530 le16_to_cpu(gd->bg_bits), 1526 le16_to_cpu(gd->bg_bits),
1531 OCFS2_I(inode)->ip_clusters, max_bits); 1527 OCFS2_I(inode)->ip_clusters, max_bits);
1532 } 1528 }
1533 1529
1534 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 1530 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
@@ -1542,9 +1538,9 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1542 gd_cluster_off + 1538 gd_cluster_off +
1543 res->sr_bit_offset + 1539 res->sr_bit_offset +
1544 res->sr_bits); 1540 res->sr_bits);
1545 mlog(0, "Checking %llu against %llu\n", 1541 trace_ocfs2_cluster_group_search_max_block(
1546 (unsigned long long)blkoff, 1542 (unsigned long long)blkoff,
1547 (unsigned long long)max_block); 1543 (unsigned long long)max_block);
1548 if (blkoff > max_block) 1544 if (blkoff > max_block)
1549 return -ENOSPC; 1545 return -ENOSPC;
1550 } 1546 }
@@ -1588,9 +1584,9 @@ static int ocfs2_block_group_search(struct inode *inode,
1588 if (!ret && max_block) { 1584 if (!ret && max_block) {
1589 blkoff = le64_to_cpu(bg->bg_blkno) + 1585 blkoff = le64_to_cpu(bg->bg_blkno) +
1590 res->sr_bit_offset + res->sr_bits; 1586 res->sr_bit_offset + res->sr_bits;
1591 mlog(0, "Checking %llu against %llu\n", 1587 trace_ocfs2_block_group_search_max_block(
1592 (unsigned long long)blkoff, 1588 (unsigned long long)blkoff,
1593 (unsigned long long)max_block); 1589 (unsigned long long)max_block);
1594 if (blkoff > max_block) 1590 if (blkoff > max_block)
1595 ret = -ENOSPC; 1591 ret = -ENOSPC;
1596 } 1592 }
@@ -1756,9 +1752,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1756 struct ocfs2_group_desc *bg; 1752 struct ocfs2_group_desc *bg;
1757 1753
1758 chain = ac->ac_chain; 1754 chain = ac->ac_chain;
1759 mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n", 1755 trace_ocfs2_search_chain_begin(
1760 bits_wanted, chain, 1756 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno,
1761 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); 1757 bits_wanted, chain);
1762 1758
1763 status = ocfs2_read_group_descriptor(alloc_inode, fe, 1759 status = ocfs2_read_group_descriptor(alloc_inode, fe,
1764 le64_to_cpu(cl->cl_recs[chain].c_blkno), 1760 le64_to_cpu(cl->cl_recs[chain].c_blkno),
@@ -1799,8 +1795,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1799 goto bail; 1795 goto bail;
1800 } 1796 }
1801 1797
1802 mlog(0, "alloc succeeds: we give %u bits from block group %llu\n", 1798 trace_ocfs2_search_chain_succ(
1803 res->sr_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno)); 1799 (unsigned long long)le64_to_cpu(bg->bg_blkno), res->sr_bits);
1804 1800
1805 res->sr_bg_blkno = le64_to_cpu(bg->bg_blkno); 1801 res->sr_bg_blkno = le64_to_cpu(bg->bg_blkno);
1806 1802
@@ -1861,8 +1857,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1861 goto bail; 1857 goto bail;
1862 } 1858 }
1863 1859
1864 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, 1860 trace_ocfs2_search_chain_end(
1865 (unsigned long long)le64_to_cpu(fe->i_blkno)); 1861 (unsigned long long)le64_to_cpu(fe->i_blkno),
1862 res->sr_bits);
1866 1863
1867out_loc_only: 1864out_loc_only:
1868 *bits_left = le16_to_cpu(bg->bg_free_bits_count); 1865 *bits_left = le16_to_cpu(bg->bg_free_bits_count);
@@ -1870,7 +1867,8 @@ bail:
1870 brelse(group_bh); 1867 brelse(group_bh);
1871 brelse(prev_group_bh); 1868 brelse(prev_group_bh);
1872 1869
1873 mlog_exit(status); 1870 if (status)
1871 mlog_errno(status);
1874 return status; 1872 return status;
1875} 1873}
1876 1874
@@ -1888,8 +1886,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1888 struct ocfs2_chain_list *cl; 1886 struct ocfs2_chain_list *cl;
1889 struct ocfs2_dinode *fe; 1887 struct ocfs2_dinode *fe;
1890 1888
1891 mlog_entry_void();
1892
1893 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); 1889 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
1894 BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given)); 1890 BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given));
1895 BUG_ON(!ac->ac_bh); 1891 BUG_ON(!ac->ac_bh);
@@ -1945,8 +1941,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1945 goto bail; 1941 goto bail;
1946 } 1942 }
1947 1943
1948 mlog(0, "Search of victim chain %u came up with nothing, " 1944 trace_ocfs2_claim_suballoc_bits(victim);
1949 "trying all chains now.\n", victim);
1950 1945
1951 /* If we didn't pick a good victim, then just default to 1946 /* If we didn't pick a good victim, then just default to
1952 * searching each chain in order. Don't allow chain relinking 1947 * searching each chain in order. Don't allow chain relinking
@@ -1984,7 +1979,8 @@ set_hint:
1984 } 1979 }
1985 1980
1986bail: 1981bail:
1987 mlog_exit(status); 1982 if (status)
1983 mlog_errno(status);
1988 return status; 1984 return status;
1989} 1985}
1990 1986
@@ -2021,7 +2017,8 @@ int ocfs2_claim_metadata(handle_t *handle,
2021 *num_bits = res.sr_bits; 2017 *num_bits = res.sr_bits;
2022 status = 0; 2018 status = 0;
2023bail: 2019bail:
2024 mlog_exit(status); 2020 if (status)
2021 mlog_errno(status);
2025 return status; 2022 return status;
2026} 2023}
2027 2024
@@ -2172,8 +2169,8 @@ int ocfs2_claim_new_inode_at_loc(handle_t *handle,
2172 goto out; 2169 goto out;
2173 } 2170 }
2174 2171
2175 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, 2172 trace_ocfs2_claim_new_inode_at_loc((unsigned long long)di_blkno,
2176 (unsigned long long)di_blkno); 2173 res->sr_bits);
2177 2174
2178 atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs); 2175 atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
2179 2176
@@ -2201,8 +2198,6 @@ int ocfs2_claim_new_inode(handle_t *handle,
2201 int status; 2198 int status;
2202 struct ocfs2_suballoc_result res; 2199 struct ocfs2_suballoc_result res;
2203 2200
2204 mlog_entry_void();
2205
2206 BUG_ON(!ac); 2201 BUG_ON(!ac);
2207 BUG_ON(ac->ac_bits_given != 0); 2202 BUG_ON(ac->ac_bits_given != 0);
2208 BUG_ON(ac->ac_bits_wanted != 1); 2203 BUG_ON(ac->ac_bits_wanted != 1);
@@ -2230,7 +2225,8 @@ int ocfs2_claim_new_inode(handle_t *handle,
2230 ocfs2_save_inode_ac_group(dir, ac); 2225 ocfs2_save_inode_ac_group(dir, ac);
2231 status = 0; 2226 status = 0;
2232bail: 2227bail:
2233 mlog_exit(status); 2228 if (status)
2229 mlog_errno(status);
2234 return status; 2230 return status;
2235} 2231}
2236 2232
@@ -2307,8 +2303,6 @@ int __ocfs2_claim_clusters(handle_t *handle,
2307 struct ocfs2_suballoc_result res = { .sr_blkno = 0, }; 2303 struct ocfs2_suballoc_result res = { .sr_blkno = 0, };
2308 struct ocfs2_super *osb = OCFS2_SB(ac->ac_inode->i_sb); 2304 struct ocfs2_super *osb = OCFS2_SB(ac->ac_inode->i_sb);
2309 2305
2310 mlog_entry_void();
2311
2312 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); 2306 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
2313 2307
2314 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL 2308 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
@@ -2363,7 +2357,8 @@ int __ocfs2_claim_clusters(handle_t *handle,
2363 ac->ac_bits_given += *num_clusters; 2357 ac->ac_bits_given += *num_clusters;
2364 2358
2365bail: 2359bail:
2366 mlog_exit(status); 2360 if (status)
2361 mlog_errno(status);
2367 return status; 2362 return status;
2368} 2363}
2369 2364
@@ -2392,13 +2387,11 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
2392 unsigned int tmp; 2387 unsigned int tmp;
2393 struct ocfs2_group_desc *undo_bg = NULL; 2388 struct ocfs2_group_desc *undo_bg = NULL;
2394 2389
2395 mlog_entry_void();
2396
2397 /* The caller got this descriptor from 2390 /* The caller got this descriptor from
2398 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ 2391 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
2399 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); 2392 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
2400 2393
2401 mlog(0, "off = %u, num = %u\n", bit_off, num_bits); 2394 trace_ocfs2_block_group_clear_bits(bit_off, num_bits);
2402 2395
2403 BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode)); 2396 BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode));
2404 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), 2397 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
@@ -2463,8 +2456,6 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
2463 struct buffer_head *group_bh = NULL; 2456 struct buffer_head *group_bh = NULL;
2464 struct ocfs2_group_desc *group; 2457 struct ocfs2_group_desc *group;
2465 2458
2466 mlog_entry_void();
2467
2468 /* The alloc_bh comes from ocfs2_free_dinode() or 2459 /* The alloc_bh comes from ocfs2_free_dinode() or
2469 * ocfs2_free_clusters(). The callers have all locked the 2460 * ocfs2_free_clusters(). The callers have all locked the
2470 * allocator and gotten alloc_bh from the lock call. This 2461 * allocator and gotten alloc_bh from the lock call. This
@@ -2473,9 +2464,10 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
2473 BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); 2464 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
2474 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); 2465 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
2475 2466
2476 mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n", 2467 trace_ocfs2_free_suballoc_bits(
2477 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, 2468 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno,
2478 (unsigned long long)bg_blkno, start_bit); 2469 (unsigned long long)bg_blkno,
2470 start_bit, count);
2479 2471
2480 status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno, 2472 status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno,
2481 &group_bh); 2473 &group_bh);
@@ -2511,7 +2503,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
2511bail: 2503bail:
2512 brelse(group_bh); 2504 brelse(group_bh);
2513 2505
2514 mlog_exit(status); 2506 if (status)
2507 mlog_errno(status);
2515 return status; 2508 return status;
2516} 2509}
2517 2510
@@ -2556,11 +2549,8 @@ static int _ocfs2_free_clusters(handle_t *handle,
2556 2549
2557 /* You can't ever have a contiguous set of clusters 2550 /* You can't ever have a contiguous set of clusters
2558 * bigger than a block group bitmap so we never have to worry 2551 * bigger than a block group bitmap so we never have to worry
2559 * about looping on them. */ 2552 * about looping on them.
2560 2553 * This is expensive. We can safely remove once this stuff has
2561 mlog_entry_void();
2562
2563 /* This is expensive. We can safely remove once this stuff has
2564 * gotten tested really well. */ 2554 * gotten tested really well. */
2565 BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk))); 2555 BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk)));
2566 2556
@@ -2569,10 +2559,9 @@ static int _ocfs2_free_clusters(handle_t *handle,
2569 ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno, 2559 ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
2570 &bg_start_bit); 2560 &bg_start_bit);
2571 2561
2572 mlog(0, "want to free %u clusters starting at block %llu\n", 2562 trace_ocfs2_free_clusters((unsigned long long)bg_blkno,
2573 num_clusters, (unsigned long long)start_blk); 2563 (unsigned long long)start_blk,
2574 mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n", 2564 bg_start_bit, num_clusters);
2575 (unsigned long long)bg_blkno, bg_start_bit);
2576 2565
2577 status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, 2566 status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
2578 bg_start_bit, bg_blkno, 2567 bg_start_bit, bg_blkno,
@@ -2586,7 +2575,8 @@ static int _ocfs2_free_clusters(handle_t *handle,
2586 num_clusters); 2575 num_clusters);
2587 2576
2588out: 2577out:
2589 mlog_exit(status); 2578 if (status)
2579 mlog_errno(status);
2590 return status; 2580 return status;
2591} 2581}
2592 2582
@@ -2756,7 +2746,7 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
2756 struct buffer_head *inode_bh = NULL; 2746 struct buffer_head *inode_bh = NULL;
2757 struct ocfs2_dinode *inode_fe; 2747 struct ocfs2_dinode *inode_fe;
2758 2748
2759 mlog_entry("blkno: %llu\n", (unsigned long long)blkno); 2749 trace_ocfs2_get_suballoc_slot_bit((unsigned long long)blkno);
2760 2750
2761 /* dirty read disk */ 2751 /* dirty read disk */
2762 status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh); 2752 status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh);
@@ -2793,7 +2783,8 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
2793bail: 2783bail:
2794 brelse(inode_bh); 2784 brelse(inode_bh);
2795 2785
2796 mlog_exit(status); 2786 if (status)
2787 mlog_errno(status);
2797 return status; 2788 return status;
2798} 2789}
2799 2790
@@ -2816,8 +2807,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2816 u64 bg_blkno; 2807 u64 bg_blkno;
2817 int status; 2808 int status;
2818 2809
2819 mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno, 2810 trace_ocfs2_test_suballoc_bit((unsigned long long)blkno,
2820 (unsigned int)bit); 2811 (unsigned int)bit);
2821 2812
2822 alloc_di = (struct ocfs2_dinode *)alloc_bh->b_data; 2813 alloc_di = (struct ocfs2_dinode *)alloc_bh->b_data;
2823 if ((bit + 1) > ocfs2_bits_per_group(&alloc_di->id2.i_chain)) { 2814 if ((bit + 1) > ocfs2_bits_per_group(&alloc_di->id2.i_chain)) {
@@ -2844,7 +2835,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2844bail: 2835bail:
2845 brelse(group_bh); 2836 brelse(group_bh);
2846 2837
2847 mlog_exit(status); 2838 if (status)
2839 mlog_errno(status);
2848 return status; 2840 return status;
2849} 2841}
2850 2842
@@ -2869,7 +2861,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
2869 struct inode *inode_alloc_inode; 2861 struct inode *inode_alloc_inode;
2870 struct buffer_head *alloc_bh = NULL; 2862 struct buffer_head *alloc_bh = NULL;
2871 2863
2872 mlog_entry("blkno: %llu", (unsigned long long)blkno); 2864 trace_ocfs2_test_inode_bit((unsigned long long)blkno);
2873 2865
2874 status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, 2866 status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
2875 &group_blkno, &suballoc_bit); 2867 &group_blkno, &suballoc_bit);
@@ -2910,6 +2902,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
2910 iput(inode_alloc_inode); 2902 iput(inode_alloc_inode);
2911 brelse(alloc_bh); 2903 brelse(alloc_bh);
2912bail: 2904bail:
2913 mlog_exit(status); 2905 if (status)
2906 mlog_errno(status);
2914 return status; 2907 return status;
2915} 2908}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 236ed1bdca2c..69fa11b35aa4 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -42,7 +42,9 @@
42#include <linux/seq_file.h> 42#include <linux/seq_file.h>
43#include <linux/quotaops.h> 43#include <linux/quotaops.h>
44 44
45#define MLOG_MASK_PREFIX ML_SUPER 45#define CREATE_TRACE_POINTS
46#include "ocfs2_trace.h"
47
46#include <cluster/masklog.h> 48#include <cluster/masklog.h>
47 49
48#include "ocfs2.h" 50#include "ocfs2.h"
@@ -441,8 +443,6 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
441 int status = 0; 443 int status = 0;
442 int i; 444 int i;
443 445
444 mlog_entry_void();
445
446 new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE, 0); 446 new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE, 0);
447 if (IS_ERR(new)) { 447 if (IS_ERR(new)) {
448 status = PTR_ERR(new); 448 status = PTR_ERR(new);
@@ -478,7 +478,8 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
478 } 478 }
479 479
480bail: 480bail:
481 mlog_exit(status); 481 if (status)
482 mlog_errno(status);
482 return status; 483 return status;
483} 484}
484 485
@@ -488,8 +489,6 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
488 int status = 0; 489 int status = 0;
489 int i; 490 int i;
490 491
491 mlog_entry_void();
492
493 for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1; 492 for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1;
494 i < NUM_SYSTEM_INODES; 493 i < NUM_SYSTEM_INODES;
495 i++) { 494 i++) {
@@ -508,7 +507,8 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
508 } 507 }
509 508
510bail: 509bail:
511 mlog_exit(status); 510 if (status)
511 mlog_errno(status);
512 return status; 512 return status;
513} 513}
514 514
@@ -517,8 +517,6 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb)
517 int i; 517 int i;
518 struct inode *inode; 518 struct inode *inode;
519 519
520 mlog_entry_void();
521
522 for (i = 0; i < NUM_GLOBAL_SYSTEM_INODES; i++) { 520 for (i = 0; i < NUM_GLOBAL_SYSTEM_INODES; i++) {
523 inode = osb->global_system_inodes[i]; 521 inode = osb->global_system_inodes[i];
524 if (inode) { 522 if (inode) {
@@ -540,7 +538,7 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb)
540 } 538 }
541 539
542 if (!osb->local_system_inodes) 540 if (!osb->local_system_inodes)
543 goto out; 541 return;
544 542
545 for (i = 0; i < NUM_LOCAL_SYSTEM_INODES * osb->max_slots; i++) { 543 for (i = 0; i < NUM_LOCAL_SYSTEM_INODES * osb->max_slots; i++) {
546 if (osb->local_system_inodes[i]) { 544 if (osb->local_system_inodes[i]) {
@@ -551,9 +549,6 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb)
551 549
552 kfree(osb->local_system_inodes); 550 kfree(osb->local_system_inodes);
553 osb->local_system_inodes = NULL; 551 osb->local_system_inodes = NULL;
554
555out:
556 mlog_exit(0);
557} 552}
558 553
559/* We're allocating fs objects, use GFP_NOFS */ 554/* We're allocating fs objects, use GFP_NOFS */
@@ -684,12 +679,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
684 } 679 }
685 680
686 if (*flags & MS_RDONLY) { 681 if (*flags & MS_RDONLY) {
687 mlog(0, "Going to ro mode.\n");
688 sb->s_flags |= MS_RDONLY; 682 sb->s_flags |= MS_RDONLY;
689 osb->osb_flags |= OCFS2_OSB_SOFT_RO; 683 osb->osb_flags |= OCFS2_OSB_SOFT_RO;
690 } else { 684 } else {
691 mlog(0, "Making ro filesystem writeable.\n");
692
693 if (osb->osb_flags & OCFS2_OSB_ERROR_FS) { 685 if (osb->osb_flags & OCFS2_OSB_ERROR_FS) {
694 mlog(ML_ERROR, "Cannot remount RDWR " 686 mlog(ML_ERROR, "Cannot remount RDWR "
695 "filesystem due to previous errors.\n"); 687 "filesystem due to previous errors.\n");
@@ -707,6 +699,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
707 sb->s_flags &= ~MS_RDONLY; 699 sb->s_flags &= ~MS_RDONLY;
708 osb->osb_flags &= ~OCFS2_OSB_SOFT_RO; 700 osb->osb_flags &= ~OCFS2_OSB_SOFT_RO;
709 } 701 }
702 trace_ocfs2_remount(sb->s_flags, osb->osb_flags, *flags);
710unlock_osb: 703unlock_osb:
711 spin_unlock(&osb->osb_lock); 704 spin_unlock(&osb->osb_lock);
712 /* Enable quota accounting after remounting RW */ 705 /* Enable quota accounting after remounting RW */
@@ -1032,7 +1025,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1032 char nodestr[8]; 1025 char nodestr[8];
1033 struct ocfs2_blockcheck_stats stats; 1026 struct ocfs2_blockcheck_stats stats;
1034 1027
1035 mlog_entry("%p, %p, %i", sb, data, silent); 1028 trace_ocfs2_fill_super(sb, data, silent);
1036 1029
1037 if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) { 1030 if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) {
1038 status = -EINVAL; 1031 status = -EINVAL;
@@ -1208,7 +1201,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1208 mlog_errno(status); 1201 mlog_errno(status);
1209 atomic_set(&osb->vol_state, VOLUME_DISABLED); 1202 atomic_set(&osb->vol_state, VOLUME_DISABLED);
1210 wake_up(&osb->osb_mount_event); 1203 wake_up(&osb->osb_mount_event);
1211 mlog_exit(status);
1212 return status; 1204 return status;
1213 } 1205 }
1214 } 1206 }
@@ -1222,7 +1214,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1222 /* Start this when the mount is almost sure of being successful */ 1214 /* Start this when the mount is almost sure of being successful */
1223 ocfs2_orphan_scan_start(osb); 1215 ocfs2_orphan_scan_start(osb);
1224 1216
1225 mlog_exit(status);
1226 return status; 1217 return status;
1227 1218
1228read_super_error: 1219read_super_error:
@@ -1237,7 +1228,8 @@ read_super_error:
1237 ocfs2_dismount_volume(sb, 1); 1228 ocfs2_dismount_volume(sb, 1);
1238 } 1229 }
1239 1230
1240 mlog_exit(status); 1231 if (status)
1232 mlog_errno(status);
1241 return status; 1233 return status;
1242} 1234}
1243 1235
@@ -1320,8 +1312,7 @@ static int ocfs2_parse_options(struct super_block *sb,
1320 char *p; 1312 char *p;
1321 u32 tmp; 1313 u32 tmp;
1322 1314
1323 mlog_entry("remount: %d, options: \"%s\"\n", is_remount, 1315 trace_ocfs2_parse_options(is_remount, options ? options : "(none)");
1324 options ? options : "(none)");
1325 1316
1326 mopt->commit_interval = 0; 1317 mopt->commit_interval = 0;
1327 mopt->mount_opt = OCFS2_MOUNT_NOINTR; 1318 mopt->mount_opt = OCFS2_MOUNT_NOINTR;
@@ -1538,7 +1529,6 @@ static int ocfs2_parse_options(struct super_block *sb,
1538 status = 1; 1529 status = 1;
1539 1530
1540bail: 1531bail:
1541 mlog_exit(status);
1542 return status; 1532 return status;
1543} 1533}
1544 1534
@@ -1629,8 +1619,6 @@ static int __init ocfs2_init(void)
1629{ 1619{
1630 int status; 1620 int status;
1631 1621
1632 mlog_entry_void();
1633
1634 ocfs2_print_version(); 1622 ocfs2_print_version();
1635 1623
1636 status = init_ocfs2_uptodate_cache(); 1624 status = init_ocfs2_uptodate_cache();
@@ -1664,10 +1652,9 @@ leave:
1664 if (status < 0) { 1652 if (status < 0) {
1665 ocfs2_free_mem_caches(); 1653 ocfs2_free_mem_caches();
1666 exit_ocfs2_uptodate_cache(); 1654 exit_ocfs2_uptodate_cache();
1655 mlog_errno(status);
1667 } 1656 }
1668 1657
1669 mlog_exit(status);
1670
1671 if (status >= 0) { 1658 if (status >= 0) {
1672 return register_filesystem(&ocfs2_fs_type); 1659 return register_filesystem(&ocfs2_fs_type);
1673 } else 1660 } else
@@ -1676,8 +1663,6 @@ leave:
1676 1663
1677static void __exit ocfs2_exit(void) 1664static void __exit ocfs2_exit(void)
1678{ 1665{
1679 mlog_entry_void();
1680
1681 if (ocfs2_wq) { 1666 if (ocfs2_wq) {
1682 flush_workqueue(ocfs2_wq); 1667 flush_workqueue(ocfs2_wq);
1683 destroy_workqueue(ocfs2_wq); 1668 destroy_workqueue(ocfs2_wq);
@@ -1692,18 +1677,14 @@ static void __exit ocfs2_exit(void)
1692 unregister_filesystem(&ocfs2_fs_type); 1677 unregister_filesystem(&ocfs2_fs_type);
1693 1678
1694 exit_ocfs2_uptodate_cache(); 1679 exit_ocfs2_uptodate_cache();
1695
1696 mlog_exit_void();
1697} 1680}
1698 1681
1699static void ocfs2_put_super(struct super_block *sb) 1682static void ocfs2_put_super(struct super_block *sb)
1700{ 1683{
1701 mlog_entry("(0x%p)\n", sb); 1684 trace_ocfs2_put_super(sb);
1702 1685
1703 ocfs2_sync_blockdev(sb); 1686 ocfs2_sync_blockdev(sb);
1704 ocfs2_dismount_volume(sb, 0); 1687 ocfs2_dismount_volume(sb, 0);
1705
1706 mlog_exit_void();
1707} 1688}
1708 1689
1709static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) 1690static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -1715,7 +1696,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
1715 struct buffer_head *bh = NULL; 1696 struct buffer_head *bh = NULL;
1716 struct inode *inode = NULL; 1697 struct inode *inode = NULL;
1717 1698
1718 mlog_entry("(%p, %p)\n", dentry->d_sb, buf); 1699 trace_ocfs2_statfs(dentry->d_sb, buf);
1719 1700
1720 osb = OCFS2_SB(dentry->d_sb); 1701 osb = OCFS2_SB(dentry->d_sb);
1721 1702
@@ -1762,7 +1743,8 @@ bail:
1762 if (inode) 1743 if (inode)
1763 iput(inode); 1744 iput(inode);
1764 1745
1765 mlog_exit(status); 1746 if (status)
1747 mlog_errno(status);
1766 1748
1767 return status; 1749 return status;
1768} 1750}
@@ -1882,8 +1864,6 @@ static int ocfs2_mount_volume(struct super_block *sb)
1882 int unlock_super = 0; 1864 int unlock_super = 0;
1883 struct ocfs2_super *osb = OCFS2_SB(sb); 1865 struct ocfs2_super *osb = OCFS2_SB(sb);
1884 1866
1885 mlog_entry_void();
1886
1887 if (ocfs2_is_hard_readonly(osb)) 1867 if (ocfs2_is_hard_readonly(osb))
1888 goto leave; 1868 goto leave;
1889 1869
@@ -1928,7 +1908,6 @@ leave:
1928 if (unlock_super) 1908 if (unlock_super)
1929 ocfs2_super_unlock(osb, 1); 1909 ocfs2_super_unlock(osb, 1);
1930 1910
1931 mlog_exit(status);
1932 return status; 1911 return status;
1933} 1912}
1934 1913
@@ -1938,7 +1917,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1938 struct ocfs2_super *osb = NULL; 1917 struct ocfs2_super *osb = NULL;
1939 char nodestr[8]; 1918 char nodestr[8];
1940 1919
1941 mlog_entry("(0x%p)\n", sb); 1920 trace_ocfs2_dismount_volume(sb);
1942 1921
1943 BUG_ON(!sb); 1922 BUG_ON(!sb);
1944 osb = OCFS2_SB(sb); 1923 osb = OCFS2_SB(sb);
@@ -2090,8 +2069,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
2090 struct ocfs2_super *osb; 2069 struct ocfs2_super *osb;
2091 u64 total_blocks; 2070 u64 total_blocks;
2092 2071
2093 mlog_entry_void();
2094
2095 osb = kzalloc(sizeof(struct ocfs2_super), GFP_KERNEL); 2072 osb = kzalloc(sizeof(struct ocfs2_super), GFP_KERNEL);
2096 if (!osb) { 2073 if (!osb) {
2097 status = -ENOMEM; 2074 status = -ENOMEM;
@@ -2155,7 +2132,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
2155 status = -EINVAL; 2132 status = -EINVAL;
2156 goto bail; 2133 goto bail;
2157 } 2134 }
2158 mlog(0, "max_slots for this device: %u\n", osb->max_slots);
2159 2135
2160 ocfs2_orphan_scan_init(osb); 2136 ocfs2_orphan_scan_init(osb);
2161 2137
@@ -2294,7 +2270,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
2294 osb->s_clustersize_bits = 2270 osb->s_clustersize_bits =
2295 le32_to_cpu(di->id2.i_super.s_clustersize_bits); 2271 le32_to_cpu(di->id2.i_super.s_clustersize_bits);
2296 osb->s_clustersize = 1 << osb->s_clustersize_bits; 2272 osb->s_clustersize = 1 << osb->s_clustersize_bits;
2297 mlog(0, "clusterbits=%d\n", osb->s_clustersize_bits);
2298 2273
2299 if (osb->s_clustersize < OCFS2_MIN_CLUSTERSIZE || 2274 if (osb->s_clustersize < OCFS2_MIN_CLUSTERSIZE ||
2300 osb->s_clustersize > OCFS2_MAX_CLUSTERSIZE) { 2275 osb->s_clustersize > OCFS2_MAX_CLUSTERSIZE) {
@@ -2333,11 +2308,10 @@ static int ocfs2_initialize_super(struct super_block *sb,
2333 le64_to_cpu(di->id2.i_super.s_first_cluster_group); 2308 le64_to_cpu(di->id2.i_super.s_first_cluster_group);
2334 osb->fs_generation = le32_to_cpu(di->i_fs_generation); 2309 osb->fs_generation = le32_to_cpu(di->i_fs_generation);
2335 osb->uuid_hash = le32_to_cpu(di->id2.i_super.s_uuid_hash); 2310 osb->uuid_hash = le32_to_cpu(di->id2.i_super.s_uuid_hash);
2336 mlog(0, "vol_label: %s\n", osb->vol_label); 2311 trace_ocfs2_initialize_super(osb->vol_label, osb->uuid_str,
2337 mlog(0, "uuid: %s\n", osb->uuid_str); 2312 (unsigned long long)osb->root_blkno,
2338 mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n", 2313 (unsigned long long)osb->system_dir_blkno,
2339 (unsigned long long)osb->root_blkno, 2314 osb->s_clustersize_bits);
2340 (unsigned long long)osb->system_dir_blkno);
2341 2315
2342 osb->osb_dlm_debug = ocfs2_new_dlm_debug(); 2316 osb->osb_dlm_debug = ocfs2_new_dlm_debug();
2343 if (!osb->osb_dlm_debug) { 2317 if (!osb->osb_dlm_debug) {
@@ -2380,7 +2354,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
2380 } 2354 }
2381 2355
2382bail: 2356bail:
2383 mlog_exit(status);
2384 return status; 2357 return status;
2385} 2358}
2386 2359
@@ -2396,8 +2369,6 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
2396{ 2369{
2397 int status = -EAGAIN; 2370 int status = -EAGAIN;
2398 2371
2399 mlog_entry_void();
2400
2401 if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE, 2372 if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
2402 strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) { 2373 strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) {
2403 /* We have to do a raw check of the feature here */ 2374 /* We have to do a raw check of the feature here */
@@ -2452,7 +2423,8 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
2452 } 2423 }
2453 2424
2454out: 2425out:
2455 mlog_exit(status); 2426 if (status && status != -EAGAIN)
2427 mlog_errno(status);
2456 return status; 2428 return status;
2457} 2429}
2458 2430
@@ -2465,8 +2437,6 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
2465 * recover 2437 * recover
2466 * ourselves. */ 2438 * ourselves. */
2467 2439
2468 mlog_entry_void();
2469
2470 /* Init our journal object. */ 2440 /* Init our journal object. */
2471 status = ocfs2_journal_init(osb->journal, &dirty); 2441 status = ocfs2_journal_init(osb->journal, &dirty);
2472 if (status < 0) { 2442 if (status < 0) {
@@ -2516,8 +2486,6 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
2516 * ourselves as mounted. */ 2486 * ourselves as mounted. */
2517 } 2487 }
2518 2488
2519 mlog(0, "Journal loaded.\n");
2520
2521 status = ocfs2_load_local_alloc(osb); 2489 status = ocfs2_load_local_alloc(osb);
2522 if (status < 0) { 2490 if (status < 0) {
2523 mlog_errno(status); 2491 mlog_errno(status);
@@ -2549,7 +2517,8 @@ finally:
2549 if (local_alloc) 2517 if (local_alloc)
2550 kfree(local_alloc); 2518 kfree(local_alloc);
2551 2519
2552 mlog_exit(status); 2520 if (status)
2521 mlog_errno(status);
2553 return status; 2522 return status;
2554} 2523}
2555 2524
@@ -2561,8 +2530,6 @@ finally:
2561 */ 2530 */
2562static void ocfs2_delete_osb(struct ocfs2_super *osb) 2531static void ocfs2_delete_osb(struct ocfs2_super *osb)
2563{ 2532{
2564 mlog_entry_void();
2565
2566 /* This function assumes that the caller has the main osb resource */ 2533 /* This function assumes that the caller has the main osb resource */
2567 2534
2568 ocfs2_free_slot_info(osb); 2535 ocfs2_free_slot_info(osb);
@@ -2580,8 +2547,6 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
2580 kfree(osb->uuid_str); 2547 kfree(osb->uuid_str);
2581 ocfs2_put_dlm_debug(osb->osb_dlm_debug); 2548 ocfs2_put_dlm_debug(osb->osb_dlm_debug);
2582 memset(osb, 0, sizeof(struct ocfs2_super)); 2549 memset(osb, 0, sizeof(struct ocfs2_super));
2583
2584 mlog_exit_void();
2585} 2550}
2586 2551
2587/* Put OCFS2 into a readonly state, or (if the user specifies it), 2552/* Put OCFS2 into a readonly state, or (if the user specifies it),
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 9975457c981f..5d22872e2bb3 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -40,7 +40,6 @@
40#include <linux/pagemap.h> 40#include <linux/pagemap.h>
41#include <linux/namei.h> 41#include <linux/namei.h>
42 42
43#define MLOG_MASK_PREFIX ML_NAMEI
44#include <cluster/masklog.h> 43#include <cluster/masklog.h>
45 44
46#include "ocfs2.h" 45#include "ocfs2.h"
@@ -62,8 +61,6 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode,
62 char *link = NULL; 61 char *link = NULL;
63 struct ocfs2_dinode *fe; 62 struct ocfs2_dinode *fe;
64 63
65 mlog_entry_void();
66
67 status = ocfs2_read_inode_block(inode, bh); 64 status = ocfs2_read_inode_block(inode, bh);
68 if (status < 0) { 65 if (status < 0) {
69 mlog_errno(status); 66 mlog_errno(status);
@@ -74,7 +71,6 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode,
74 fe = (struct ocfs2_dinode *) (*bh)->b_data; 71 fe = (struct ocfs2_dinode *) (*bh)->b_data;
75 link = (char *) fe->id2.i_symlink; 72 link = (char *) fe->id2.i_symlink;
76bail: 73bail:
77 mlog_exit(status);
78 74
79 return link; 75 return link;
80} 76}
@@ -88,8 +84,6 @@ static int ocfs2_readlink(struct dentry *dentry,
88 struct buffer_head *bh = NULL; 84 struct buffer_head *bh = NULL;
89 struct inode *inode = dentry->d_inode; 85 struct inode *inode = dentry->d_inode;
90 86
91 mlog_entry_void();
92
93 link = ocfs2_fast_symlink_getlink(inode, &bh); 87 link = ocfs2_fast_symlink_getlink(inode, &bh);
94 if (IS_ERR(link)) { 88 if (IS_ERR(link)) {
95 ret = PTR_ERR(link); 89 ret = PTR_ERR(link);
@@ -104,7 +98,8 @@ static int ocfs2_readlink(struct dentry *dentry,
104 98
105 brelse(bh); 99 brelse(bh);
106out: 100out:
107 mlog_exit(ret); 101 if (ret < 0)
102 mlog_errno(ret);
108 return ret; 103 return ret;
109} 104}
110 105
@@ -117,8 +112,6 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry,
117 struct inode *inode = dentry->d_inode; 112 struct inode *inode = dentry->d_inode;
118 struct buffer_head *bh = NULL; 113 struct buffer_head *bh = NULL;
119 114
120 mlog_entry_void();
121
122 BUG_ON(!ocfs2_inode_is_fast_symlink(inode)); 115 BUG_ON(!ocfs2_inode_is_fast_symlink(inode));
123 target = ocfs2_fast_symlink_getlink(inode, &bh); 116 target = ocfs2_fast_symlink_getlink(inode, &bh);
124 if (IS_ERR(target)) { 117 if (IS_ERR(target)) {
@@ -142,7 +135,8 @@ bail:
142 nd_set_link(nd, status ? ERR_PTR(status) : link); 135 nd_set_link(nd, status ? ERR_PTR(status) : link);
143 brelse(bh); 136 brelse(bh);
144 137
145 mlog_exit(status); 138 if (status)
139 mlog_errno(status);
146 return NULL; 140 return NULL;
147} 141}
148 142
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index 902efb23b6a6..3d635f4bbb20 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -27,7 +27,6 @@
27#include <linux/types.h> 27#include <linux/types.h>
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29 29
30#define MLOG_MASK_PREFIX ML_INODE
31#include <cluster/masklog.h> 30#include <cluster/masklog.h>
32 31
33#include "ocfs2.h" 32#include "ocfs2.h"
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index a0a120e82b97..52eaf33d346f 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -54,14 +54,13 @@
54#include <linux/buffer_head.h> 54#include <linux/buffer_head.h>
55#include <linux/rbtree.h> 55#include <linux/rbtree.h>
56 56
57#define MLOG_MASK_PREFIX ML_UPTODATE
58
59#include <cluster/masklog.h> 57#include <cluster/masklog.h>
60 58
61#include "ocfs2.h" 59#include "ocfs2.h"
62 60
63#include "inode.h" 61#include "inode.h"
64#include "uptodate.h" 62#include "uptodate.h"
63#include "ocfs2_trace.h"
65 64
66struct ocfs2_meta_cache_item { 65struct ocfs2_meta_cache_item {
67 struct rb_node c_node; 66 struct rb_node c_node;
@@ -152,8 +151,8 @@ static unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root)
152 while ((node = rb_last(root)) != NULL) { 151 while ((node = rb_last(root)) != NULL) {
153 item = rb_entry(node, struct ocfs2_meta_cache_item, c_node); 152 item = rb_entry(node, struct ocfs2_meta_cache_item, c_node);
154 153
155 mlog(0, "Purge item %llu\n", 154 trace_ocfs2_purge_copied_metadata_tree(
156 (unsigned long long) item->c_block); 155 (unsigned long long) item->c_block);
157 156
158 rb_erase(&item->c_node, root); 157 rb_erase(&item->c_node, root);
159 kmem_cache_free(ocfs2_uptodate_cachep, item); 158 kmem_cache_free(ocfs2_uptodate_cachep, item);
@@ -180,9 +179,9 @@ void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci)
180 tree = !(ci->ci_flags & OCFS2_CACHE_FL_INLINE); 179 tree = !(ci->ci_flags & OCFS2_CACHE_FL_INLINE);
181 to_purge = ci->ci_num_cached; 180 to_purge = ci->ci_num_cached;
182 181
183 mlog(0, "Purge %u %s items from Owner %llu\n", to_purge, 182 trace_ocfs2_metadata_cache_purge(
184 tree ? "array" : "tree", 183 (unsigned long long)ocfs2_metadata_cache_owner(ci),
185 (unsigned long long)ocfs2_metadata_cache_owner(ci)); 184 to_purge, tree);
186 185
187 /* If we're a tree, save off the root so that we can safely 186 /* If we're a tree, save off the root so that we can safely
188 * initialize the cache. We do the work to free tree members 187 * initialize the cache. We do the work to free tree members
@@ -249,10 +248,10 @@ static int ocfs2_buffer_cached(struct ocfs2_caching_info *ci,
249 248
250 ocfs2_metadata_cache_lock(ci); 249 ocfs2_metadata_cache_lock(ci);
251 250
252 mlog(0, "Owner %llu, query block %llu (inline = %u)\n", 251 trace_ocfs2_buffer_cached_begin(
253 (unsigned long long)ocfs2_metadata_cache_owner(ci), 252 (unsigned long long)ocfs2_metadata_cache_owner(ci),
254 (unsigned long long) bh->b_blocknr, 253 (unsigned long long) bh->b_blocknr,
255 !!(ci->ci_flags & OCFS2_CACHE_FL_INLINE)); 254 !!(ci->ci_flags & OCFS2_CACHE_FL_INLINE));
256 255
257 if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) 256 if (ci->ci_flags & OCFS2_CACHE_FL_INLINE)
258 index = ocfs2_search_cache_array(ci, bh->b_blocknr); 257 index = ocfs2_search_cache_array(ci, bh->b_blocknr);
@@ -261,7 +260,7 @@ static int ocfs2_buffer_cached(struct ocfs2_caching_info *ci,
261 260
262 ocfs2_metadata_cache_unlock(ci); 261 ocfs2_metadata_cache_unlock(ci);
263 262
264 mlog(0, "index = %d, item = %p\n", index, item); 263 trace_ocfs2_buffer_cached_end(index, item);
265 264
266 return (index != -1) || (item != NULL); 265 return (index != -1) || (item != NULL);
267} 266}
@@ -306,8 +305,9 @@ static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
306{ 305{
307 BUG_ON(ci->ci_num_cached >= OCFS2_CACHE_INFO_MAX_ARRAY); 306 BUG_ON(ci->ci_num_cached >= OCFS2_CACHE_INFO_MAX_ARRAY);
308 307
309 mlog(0, "block %llu takes position %u\n", (unsigned long long) block, 308 trace_ocfs2_append_cache_array(
310 ci->ci_num_cached); 309 (unsigned long long)ocfs2_metadata_cache_owner(ci),
310 (unsigned long long)block, ci->ci_num_cached);
311 311
312 ci->ci_cache.ci_array[ci->ci_num_cached] = block; 312 ci->ci_cache.ci_array[ci->ci_num_cached] = block;
313 ci->ci_num_cached++; 313 ci->ci_num_cached++;
@@ -324,8 +324,9 @@ static void __ocfs2_insert_cache_tree(struct ocfs2_caching_info *ci,
324 struct rb_node **p = &ci->ci_cache.ci_tree.rb_node; 324 struct rb_node **p = &ci->ci_cache.ci_tree.rb_node;
325 struct ocfs2_meta_cache_item *tmp; 325 struct ocfs2_meta_cache_item *tmp;
326 326
327 mlog(0, "Insert block %llu num = %u\n", (unsigned long long) block, 327 trace_ocfs2_insert_cache_tree(
328 ci->ci_num_cached); 328 (unsigned long long)ocfs2_metadata_cache_owner(ci),
329 (unsigned long long)block, ci->ci_num_cached);
329 330
330 while(*p) { 331 while(*p) {
331 parent = *p; 332 parent = *p;
@@ -389,9 +390,9 @@ static void ocfs2_expand_cache(struct ocfs2_caching_info *ci,
389 tree[i] = NULL; 390 tree[i] = NULL;
390 } 391 }
391 392
392 mlog(0, "Expanded %llu to a tree cache: flags 0x%x, num = %u\n", 393 trace_ocfs2_expand_cache(
393 (unsigned long long)ocfs2_metadata_cache_owner(ci), 394 (unsigned long long)ocfs2_metadata_cache_owner(ci),
394 ci->ci_flags, ci->ci_num_cached); 395 ci->ci_flags, ci->ci_num_cached);
395} 396}
396 397
397/* Slow path function - memory allocation is necessary. See the 398/* Slow path function - memory allocation is necessary. See the
@@ -405,9 +406,9 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
405 struct ocfs2_meta_cache_item *tree[OCFS2_CACHE_INFO_MAX_ARRAY] = 406 struct ocfs2_meta_cache_item *tree[OCFS2_CACHE_INFO_MAX_ARRAY] =
406 { NULL, }; 407 { NULL, };
407 408
408 mlog(0, "Owner %llu, block %llu, expand = %d\n", 409 trace_ocfs2_set_buffer_uptodate(
409 (unsigned long long)ocfs2_metadata_cache_owner(ci), 410 (unsigned long long)ocfs2_metadata_cache_owner(ci),
410 (unsigned long long)block, expand_tree); 411 (unsigned long long)block, expand_tree);
411 412
412 new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS); 413 new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS);
413 if (!new) { 414 if (!new) {
@@ -433,7 +434,6 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
433 434
434 ocfs2_metadata_cache_lock(ci); 435 ocfs2_metadata_cache_lock(ci);
435 if (ocfs2_insert_can_use_array(ci)) { 436 if (ocfs2_insert_can_use_array(ci)) {
436 mlog(0, "Someone cleared the tree underneath us\n");
437 /* Ok, items were removed from the cache in between 437 /* Ok, items were removed from the cache in between
438 * locks. Detect this and revert back to the fast path */ 438 * locks. Detect this and revert back to the fast path */
439 ocfs2_append_cache_array(ci, block); 439 ocfs2_append_cache_array(ci, block);
@@ -490,9 +490,9 @@ void ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
490 if (ocfs2_buffer_cached(ci, bh)) 490 if (ocfs2_buffer_cached(ci, bh))
491 return; 491 return;
492 492
493 mlog(0, "Owner %llu, inserting block %llu\n", 493 trace_ocfs2_set_buffer_uptodate_begin(
494 (unsigned long long)ocfs2_metadata_cache_owner(ci), 494 (unsigned long long)ocfs2_metadata_cache_owner(ci),
495 (unsigned long long)bh->b_blocknr); 495 (unsigned long long)bh->b_blocknr);
496 496
497 /* No need to recheck under spinlock - insertion is guarded by 497 /* No need to recheck under spinlock - insertion is guarded by
498 * co_io_lock() */ 498 * co_io_lock() */
@@ -542,8 +542,9 @@ static void ocfs2_remove_metadata_array(struct ocfs2_caching_info *ci,
542 BUG_ON(index >= ci->ci_num_cached); 542 BUG_ON(index >= ci->ci_num_cached);
543 BUG_ON(!ci->ci_num_cached); 543 BUG_ON(!ci->ci_num_cached);
544 544
545 mlog(0, "remove index %d (num_cached = %u\n", index, 545 trace_ocfs2_remove_metadata_array(
546 ci->ci_num_cached); 546 (unsigned long long)ocfs2_metadata_cache_owner(ci),
547 index, ci->ci_num_cached);
547 548
548 ci->ci_num_cached--; 549 ci->ci_num_cached--;
549 550
@@ -559,8 +560,9 @@ static void ocfs2_remove_metadata_array(struct ocfs2_caching_info *ci,
559static void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci, 560static void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci,
560 struct ocfs2_meta_cache_item *item) 561 struct ocfs2_meta_cache_item *item)
561{ 562{
562 mlog(0, "remove block %llu from tree\n", 563 trace_ocfs2_remove_metadata_tree(
563 (unsigned long long) item->c_block); 564 (unsigned long long)ocfs2_metadata_cache_owner(ci),
565 (unsigned long long)item->c_block);
564 566
565 rb_erase(&item->c_node, &ci->ci_cache.ci_tree); 567 rb_erase(&item->c_node, &ci->ci_cache.ci_tree);
566 ci->ci_num_cached--; 568 ci->ci_num_cached--;
@@ -573,10 +575,10 @@ static void ocfs2_remove_block_from_cache(struct ocfs2_caching_info *ci,
573 struct ocfs2_meta_cache_item *item = NULL; 575 struct ocfs2_meta_cache_item *item = NULL;
574 576
575 ocfs2_metadata_cache_lock(ci); 577 ocfs2_metadata_cache_lock(ci);
576 mlog(0, "Owner %llu, remove %llu, items = %u, array = %u\n", 578 trace_ocfs2_remove_block_from_cache(
577 (unsigned long long)ocfs2_metadata_cache_owner(ci), 579 (unsigned long long)ocfs2_metadata_cache_owner(ci),
578 (unsigned long long) block, ci->ci_num_cached, 580 (unsigned long long) block, ci->ci_num_cached,
579 ci->ci_flags & OCFS2_CACHE_FL_INLINE); 581 ci->ci_flags);
580 582
581 if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) { 583 if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) {
582 index = ocfs2_search_cache_array(ci, block); 584 index = ocfs2_search_cache_array(ci, block);
@@ -626,9 +628,6 @@ int __init init_ocfs2_uptodate_cache(void)
626 if (!ocfs2_uptodate_cachep) 628 if (!ocfs2_uptodate_cachep)
627 return -ENOMEM; 629 return -ENOMEM;
628 630
629 mlog(0, "%u inlined cache items per inode.\n",
630 OCFS2_CACHE_INFO_MAX_ARRAY);
631
632 return 0; 631 return 0;
633} 632}
634 633
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 6bb602486c6b..57a215dc2d9b 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -37,7 +37,6 @@
37#include <linux/string.h> 37#include <linux/string.h>
38#include <linux/security.h> 38#include <linux/security.h>
39 39
40#define MLOG_MASK_PREFIX ML_XATTR
41#include <cluster/masklog.h> 40#include <cluster/masklog.h>
42 41
43#include "ocfs2.h" 42#include "ocfs2.h"
@@ -57,6 +56,7 @@
57#include "xattr.h" 56#include "xattr.h"
58#include "refcounttree.h" 57#include "refcounttree.h"
59#include "acl.h" 58#include "acl.h"
59#include "ocfs2_trace.h"
60 60
61struct ocfs2_xattr_def_value_root { 61struct ocfs2_xattr_def_value_root {
62 struct ocfs2_xattr_value_root xv; 62 struct ocfs2_xattr_value_root xv;
@@ -474,8 +474,7 @@ static int ocfs2_validate_xattr_block(struct super_block *sb,
474 struct ocfs2_xattr_block *xb = 474 struct ocfs2_xattr_block *xb =
475 (struct ocfs2_xattr_block *)bh->b_data; 475 (struct ocfs2_xattr_block *)bh->b_data;
476 476
477 mlog(0, "Validating xattr block %llu\n", 477 trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr);
478 (unsigned long long)bh->b_blocknr);
479 478
480 BUG_ON(!buffer_uptodate(bh)); 479 BUG_ON(!buffer_uptodate(bh));
481 480
@@ -715,11 +714,11 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
715 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 714 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
716 struct ocfs2_extent_tree et; 715 struct ocfs2_extent_tree et;
717 716
718 mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
719
720 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 717 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
721 718
722 while (clusters_to_add) { 719 while (clusters_to_add) {
720 trace_ocfs2_xattr_extend_allocation(clusters_to_add);
721
723 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 722 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
724 OCFS2_JOURNAL_ACCESS_WRITE); 723 OCFS2_JOURNAL_ACCESS_WRITE);
725 if (status < 0) { 724 if (status < 0) {
@@ -754,8 +753,6 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
754 */ 753 */
755 BUG_ON(why == RESTART_META); 754 BUG_ON(why == RESTART_META);
756 755
757 mlog(0, "restarting xattr value extension for %u"
758 " clusters,.\n", clusters_to_add);
759 credits = ocfs2_calc_extend_credits(inode->i_sb, 756 credits = ocfs2_calc_extend_credits(inode->i_sb,
760 &vb->vb_xv->xr_list, 757 &vb->vb_xv->xr_list,
761 clusters_to_add); 758 clusters_to_add);
@@ -3246,8 +3243,8 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
3246 } 3243 }
3247 3244
3248 meta_add += extra_meta; 3245 meta_add += extra_meta;
3249 mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, " 3246 trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add,
3250 "credits = %d\n", xi->xi_name, meta_add, clusters_add, *credits); 3247 clusters_add, *credits);
3251 3248
3252 if (meta_add) { 3249 if (meta_add) {
3253 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, 3250 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
@@ -3887,8 +3884,10 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
3887 3884
3888 if (found) { 3885 if (found) {
3889 xs->here = &xs->header->xh_entries[index]; 3886 xs->here = &xs->header->xh_entries[index];
3890 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name, 3887 trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno,
3891 (unsigned long long)bucket_blkno(xs->bucket), index); 3888 name, name_index, name_hash,
3889 (unsigned long long)bucket_blkno(xs->bucket),
3890 index);
3892 } else 3891 } else
3893 ret = -ENODATA; 3892 ret = -ENODATA;
3894 3893
@@ -3915,8 +3914,10 @@ static int ocfs2_xattr_index_block_find(struct inode *inode,
3915 if (le16_to_cpu(el->l_next_free_rec) == 0) 3914 if (le16_to_cpu(el->l_next_free_rec) == 0)
3916 return -ENODATA; 3915 return -ENODATA;
3917 3916
3918 mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n", 3917 trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno,
3919 name, name_hash, name_index); 3918 name, name_index, name_hash,
3919 (unsigned long long)root_bh->b_blocknr,
3920 -1);
3920 3921
3921 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, 3922 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3922 &num_clusters, el); 3923 &num_clusters, el);
@@ -3927,9 +3928,10 @@ static int ocfs2_xattr_index_block_find(struct inode *inode,
3927 3928
3928 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 3929 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3929 3930
3930 mlog(0, "find xattr extent rec %u clusters from %llu, the first hash " 3931 trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno,
3931 "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno, 3932 name, name_index, first_hash,
3932 first_hash); 3933 (unsigned long long)p_blkno,
3934 num_clusters);
3933 3935
3934 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 3936 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3935 p_blkno, first_hash, num_clusters, xs); 3937 p_blkno, first_hash, num_clusters, xs);
@@ -3955,8 +3957,9 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3955 return -ENOMEM; 3957 return -ENOMEM;
3956 } 3958 }
3957 3959
3958 mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n", 3960 trace_ocfs2_iterate_xattr_buckets(
3959 clusters, (unsigned long long)blkno); 3961 (unsigned long long)OCFS2_I(inode)->ip_blkno,
3962 (unsigned long long)blkno, clusters);
3960 3963
3961 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { 3964 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3962 ret = ocfs2_read_xattr_bucket(bucket, blkno); 3965 ret = ocfs2_read_xattr_bucket(bucket, blkno);
@@ -3972,8 +3975,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3972 if (i == 0) 3975 if (i == 0)
3973 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); 3976 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3974 3977
3975 mlog(0, "iterating xattr bucket %llu, first hash %u\n", 3978 trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno,
3976 (unsigned long long)blkno,
3977 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); 3979 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3978 if (func) { 3980 if (func) {
3979 ret = func(inode, bucket, para); 3981 ret = func(inode, bucket, para);
@@ -4173,9 +4175,9 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4173 char *src = xb_bh->b_data; 4175 char *src = xb_bh->b_data;
4174 char *target = bucket_block(bucket, blks - 1); 4176 char *target = bucket_block(bucket, blks - 1);
4175 4177
4176 mlog(0, "cp xattr from block %llu to bucket %llu\n", 4178 trace_ocfs2_cp_xattr_block_to_bucket_begin(
4177 (unsigned long long)xb_bh->b_blocknr, 4179 (unsigned long long)xb_bh->b_blocknr,
4178 (unsigned long long)bucket_blkno(bucket)); 4180 (unsigned long long)bucket_blkno(bucket));
4179 4181
4180 for (i = 0; i < blks; i++) 4182 for (i = 0; i < blks; i++)
4181 memset(bucket_block(bucket, i), 0, blocksize); 4183 memset(bucket_block(bucket, i), 0, blocksize);
@@ -4211,8 +4213,7 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4211 for (i = 0; i < count; i++) 4213 for (i = 0; i < count; i++)
4212 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); 4214 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
4213 4215
4214 mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n", 4216 trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change);
4215 offset, size, off_change);
4216 4217
4217 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), 4218 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
4218 cmp_xe, swap_xe); 4219 cmp_xe, swap_xe);
@@ -4261,8 +4262,8 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
4261 struct ocfs2_xattr_tree_root *xr; 4262 struct ocfs2_xattr_tree_root *xr;
4262 u16 xb_flags = le16_to_cpu(xb->xb_flags); 4263 u16 xb_flags = le16_to_cpu(xb->xb_flags);
4263 4264
4264 mlog(0, "create xattr index block for %llu\n", 4265 trace_ocfs2_xattr_create_index_block_begin(
4265 (unsigned long long)xb_bh->b_blocknr); 4266 (unsigned long long)xb_bh->b_blocknr);
4266 4267
4267 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 4268 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
4268 BUG_ON(!xs->bucket); 4269 BUG_ON(!xs->bucket);
@@ -4295,8 +4296,7 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
4295 */ 4296 */
4296 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 4297 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
4297 4298
4298 mlog(0, "allocate 1 cluster from %llu to xattr block\n", 4299 trace_ocfs2_xattr_create_index_block((unsigned long long)blkno);
4299 (unsigned long long)blkno);
4300 4300
4301 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno); 4301 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
4302 if (ret) { 4302 if (ret) {
@@ -4400,8 +4400,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
4400 entries = (char *)xh->xh_entries; 4400 entries = (char *)xh->xh_entries;
4401 xh_free_start = le16_to_cpu(xh->xh_free_start); 4401 xh_free_start = le16_to_cpu(xh->xh_free_start);
4402 4402
4403 mlog(0, "adjust xattr bucket in %llu, count = %u, " 4403 trace_ocfs2_defrag_xattr_bucket(
4404 "xh_free_start = %u, xh_name_value_len = %u.\n",
4405 (unsigned long long)blkno, le16_to_cpu(xh->xh_count), 4404 (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4406 xh_free_start, le16_to_cpu(xh->xh_name_value_len)); 4405 xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4407 4406
@@ -4503,8 +4502,9 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4503 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets); 4502 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4504 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize); 4503 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4505 4504
4506 mlog(0, "move half of xattrs in cluster %llu to %llu\n", 4505 trace_ocfs2_mv_xattr_bucket_cross_cluster(
4507 (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno); 4506 (unsigned long long)last_cluster_blkno,
4507 (unsigned long long)new_blkno);
4508 4508
4509 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first), 4509 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4510 last_cluster_blkno, new_blkno, 4510 last_cluster_blkno, new_blkno,
@@ -4614,8 +4614,8 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
4614 struct ocfs2_xattr_entry *xe; 4614 struct ocfs2_xattr_entry *xe;
4615 int blocksize = inode->i_sb->s_blocksize; 4615 int blocksize = inode->i_sb->s_blocksize;
4616 4616
4617 mlog(0, "move some of xattrs from bucket %llu to %llu\n", 4617 trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk,
4618 (unsigned long long)blk, (unsigned long long)new_blk); 4618 (unsigned long long)new_blk);
4619 4619
4620 s_bucket = ocfs2_xattr_bucket_new(inode); 4620 s_bucket = ocfs2_xattr_bucket_new(inode);
4621 t_bucket = ocfs2_xattr_bucket_new(inode); 4621 t_bucket = ocfs2_xattr_bucket_new(inode);
@@ -4714,9 +4714,9 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
4714 */ 4714 */
4715 xe = &xh->xh_entries[start]; 4715 xe = &xh->xh_entries[start];
4716 len = sizeof(struct ocfs2_xattr_entry) * (count - start); 4716 len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4717 mlog(0, "mv xattr entry len %d from %d to %d\n", len, 4717 trace_ocfs2_divide_xattr_bucket_move(len,
4718 (int)((char *)xe - (char *)xh), 4718 (int)((char *)xe - (char *)xh),
4719 (int)((char *)xh->xh_entries - (char *)xh)); 4719 (int)((char *)xh->xh_entries - (char *)xh));
4720 memmove((char *)xh->xh_entries, (char *)xe, len); 4720 memmove((char *)xh->xh_entries, (char *)xe, len);
4721 xe = &xh->xh_entries[count - start]; 4721 xe = &xh->xh_entries[count - start];
4722 len = sizeof(struct ocfs2_xattr_entry) * start; 4722 len = sizeof(struct ocfs2_xattr_entry) * start;
@@ -4788,9 +4788,9 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
4788 4788
4789 BUG_ON(s_blkno == t_blkno); 4789 BUG_ON(s_blkno == t_blkno);
4790 4790
4791 mlog(0, "cp bucket %llu to %llu, target is %d\n", 4791 trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno,
4792 (unsigned long long)s_blkno, (unsigned long long)t_blkno, 4792 (unsigned long long)t_blkno,
4793 t_is_new); 4793 t_is_new);
4794 4794
4795 s_bucket = ocfs2_xattr_bucket_new(inode); 4795 s_bucket = ocfs2_xattr_bucket_new(inode);
4796 t_bucket = ocfs2_xattr_bucket_new(inode); 4796 t_bucket = ocfs2_xattr_bucket_new(inode);
@@ -4862,8 +4862,8 @@ static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4862 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4862 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4863 struct ocfs2_xattr_bucket *old_first, *new_first; 4863 struct ocfs2_xattr_bucket *old_first, *new_first;
4864 4864
4865 mlog(0, "mv xattrs from cluster %llu to %llu\n", 4865 trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk,
4866 (unsigned long long)last_blk, (unsigned long long)to_blk); 4866 (unsigned long long)to_blk);
4867 4867
4868 BUG_ON(start_bucket >= num_buckets); 4868 BUG_ON(start_bucket >= num_buckets);
4869 if (start_bucket) { 4869 if (start_bucket) {
@@ -5013,9 +5013,9 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
5013{ 5013{
5014 int ret; 5014 int ret;
5015 5015
5016 mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n", 5016 trace_ocfs2_adjust_xattr_cross_cluster(
5017 (unsigned long long)bucket_blkno(first), prev_clusters, 5017 (unsigned long long)bucket_blkno(first),
5018 (unsigned long long)new_blk); 5018 (unsigned long long)new_blk, prev_clusters);
5019 5019
5020 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) { 5020 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
5021 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 5021 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
@@ -5088,10 +5088,10 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5088 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5088 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5089 struct ocfs2_extent_tree et; 5089 struct ocfs2_extent_tree et;
5090 5090
5091 mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, " 5091 trace_ocfs2_add_new_xattr_cluster_begin(
5092 "previous xattr blkno = %llu\n", 5092 (unsigned long long)OCFS2_I(inode)->ip_blkno,
5093 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5093 (unsigned long long)bucket_blkno(first),
5094 prev_cpos, (unsigned long long)bucket_blkno(first)); 5094 prev_cpos, prev_clusters);
5095 5095
5096 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); 5096 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5097 5097
@@ -5113,8 +5113,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5113 BUG_ON(num_bits > clusters_to_add); 5113 BUG_ON(num_bits > clusters_to_add);
5114 5114
5115 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 5115 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
5116 mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n", 5116 trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits);
5117 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
5118 5117
5119 if (bucket_blkno(first) + (prev_clusters * bpc) == block && 5118 if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
5120 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 5119 (prev_clusters + num_bits) << osb->s_clustersize_bits <=
@@ -5130,8 +5129,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5130 */ 5129 */
5131 v_start = prev_cpos + prev_clusters; 5130 v_start = prev_cpos + prev_clusters;
5132 *num_clusters = prev_clusters + num_bits; 5131 *num_clusters = prev_clusters + num_bits;
5133 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
5134 num_bits);
5135 } else { 5132 } else {
5136 ret = ocfs2_adjust_xattr_cross_cluster(inode, 5133 ret = ocfs2_adjust_xattr_cross_cluster(inode,
5137 handle, 5134 handle,
@@ -5147,8 +5144,8 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5147 } 5144 }
5148 } 5145 }
5149 5146
5150 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", 5147 trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block,
5151 num_bits, (unsigned long long)block, v_start); 5148 v_start, num_bits);
5152 ret = ocfs2_insert_extent(handle, &et, v_start, block, 5149 ret = ocfs2_insert_extent(handle, &et, v_start, block,
5153 num_bits, 0, ctxt->meta_ac); 5150 num_bits, 0, ctxt->meta_ac);
5154 if (ret < 0) { 5151 if (ret < 0) {
@@ -5183,9 +5180,9 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
5183 u64 end_blk; 5180 u64 end_blk;
5184 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets); 5181 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
5185 5182
5186 mlog(0, "extend xattr bucket in %llu, xattr extend rec starting " 5183 trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk,
5187 "from %llu, len = %u\n", (unsigned long long)target_blk, 5184 (unsigned long long)bucket_blkno(first),
5188 (unsigned long long)bucket_blkno(first), num_clusters); 5185 num_clusters, new_bucket);
5189 5186
5190 /* The extent must have room for an additional bucket */ 5187 /* The extent must have room for an additional bucket */
5191 BUG_ON(new_bucket >= 5188 BUG_ON(new_bucket >=
@@ -5265,8 +5262,8 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
5265 /* The bucket at the front of the extent */ 5262 /* The bucket at the front of the extent */
5266 struct ocfs2_xattr_bucket *first; 5263 struct ocfs2_xattr_bucket *first;
5267 5264
5268 mlog(0, "Add new xattr bucket starting from %llu\n", 5265 trace_ocfs2_add_new_xattr_bucket(
5269 (unsigned long long)bucket_blkno(target)); 5266 (unsigned long long)bucket_blkno(target));
5270 5267
5271 /* The first bucket of the original extent */ 5268 /* The first bucket of the original extent */
5272 first = ocfs2_xattr_bucket_new(inode); 5269 first = ocfs2_xattr_bucket_new(inode);
@@ -5382,8 +5379,8 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5382 * modified something. We have to assume they did, and dirty 5379 * modified something. We have to assume they did, and dirty
5383 * the whole bucket. This leaves us in a consistent state. 5380 * the whole bucket. This leaves us in a consistent state.
5384 */ 5381 */
5385 mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n", 5382 trace_ocfs2_xattr_bucket_value_truncate(
5386 xe_off, (unsigned long long)bucket_blkno(bucket), len); 5383 (unsigned long long)bucket_blkno(bucket), xe_off, len);
5387 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt); 5384 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5388 if (ret) { 5385 if (ret) {
5389 mlog_errno(ret); 5386 mlog_errno(ret);
@@ -5433,8 +5430,9 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
5433 5430
5434 ocfs2_init_dealloc_ctxt(&dealloc); 5431 ocfs2_init_dealloc_ctxt(&dealloc);
5435 5432
5436 mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n", 5433 trace_ocfs2_rm_xattr_cluster(
5437 cpos, len, (unsigned long long)blkno); 5434 (unsigned long long)OCFS2_I(inode)->ip_blkno,
5435 (unsigned long long)blkno, cpos, len);
5438 5436
5439 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, 5437 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5440 len); 5438 len);
@@ -5538,7 +5536,7 @@ static int ocfs2_xattr_set_entry_bucket(struct inode *inode,
5538 int ret; 5536 int ret;
5539 struct ocfs2_xa_loc loc; 5537 struct ocfs2_xa_loc loc;
5540 5538
5541 mlog_entry("Set xattr %s in xattr bucket\n", xi->xi_name); 5539 trace_ocfs2_xattr_set_entry_bucket(xi->xi_name);
5542 5540
5543 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket, 5541 ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
5544 xs->not_found ? NULL : xs->here); 5542 xs->not_found ? NULL : xs->here);
@@ -5570,7 +5568,6 @@ static int ocfs2_xattr_set_entry_bucket(struct inode *inode,
5570 5568
5571 5569
5572out: 5570out:
5573 mlog_exit(ret);
5574 return ret; 5571 return ret;
5575} 5572}
5576 5573
@@ -5581,7 +5578,7 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5581{ 5578{
5582 int ret; 5579 int ret;
5583 5580
5584 mlog_entry("Set xattr %s in xattr index block\n", xi->xi_name); 5581 trace_ocfs2_xattr_set_entry_index_block(xi->xi_name);
5585 5582
5586 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt); 5583 ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5587 if (!ret) 5584 if (!ret)
@@ -5637,7 +5634,6 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5637 mlog_errno(ret); 5634 mlog_errno(ret);
5638 5635
5639out: 5636out:
5640 mlog_exit(ret);
5641 return ret; 5637 return ret;
5642} 5638}
5643 5639
@@ -6041,9 +6037,9 @@ static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
6041 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) 6037 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
6042 p = &refcount; 6038 p = &refcount;
6043 6039
6044 mlog(0, "refcount bucket %llu, count = %u\n", 6040 trace_ocfs2_xattr_bucket_value_refcount(
6045 (unsigned long long)bucket_blkno(bucket), 6041 (unsigned long long)bucket_blkno(bucket),
6046 le16_to_cpu(xh->xh_count)); 6042 le16_to_cpu(xh->xh_count));
6047 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 6043 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6048 xe = &xh->xh_entries[i]; 6044 xe = &xh->xh_entries[i];
6049 6045
@@ -6339,8 +6335,8 @@ static int ocfs2_reflink_xattr_header(handle_t *handle,
6339 u32 clusters, cpos, p_cluster, num_clusters; 6335 u32 clusters, cpos, p_cluster, num_clusters;
6340 unsigned int ext_flags = 0; 6336 unsigned int ext_flags = 0;
6341 6337
6342 mlog(0, "reflink xattr in container %llu, count = %u\n", 6338 trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr,
6343 (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count)); 6339 le16_to_cpu(xh->xh_count));
6344 6340
6345 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)]; 6341 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6346 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { 6342 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
@@ -6540,8 +6536,8 @@ static int ocfs2_create_empty_xattr_block(struct inode *inode,
6540 goto out; 6536 goto out;
6541 } 6537 }
6542 6538
6543 mlog(0, "create new xattr block for inode %llu, index = %d\n", 6539 trace_ocfs2_create_empty_xattr_block(
6544 (unsigned long long)fe_bh->b_blocknr, indexed); 6540 (unsigned long long)fe_bh->b_blocknr, indexed);
6545 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed, 6541 ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed,
6546 ret_bh); 6542 ret_bh);
6547 if (ret) 6543 if (ret)
@@ -6952,8 +6948,8 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6952 if (ret) 6948 if (ret)
6953 mlog_errno(ret); 6949 mlog_errno(ret);
6954 6950
6955 mlog(0, "insert new xattr extent rec start %llu len %u to %u\n", 6951 trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno,
6956 (unsigned long long)new_blkno, num_clusters, reflink_cpos); 6952 num_clusters, reflink_cpos);
6957 6953
6958 len -= num_clusters; 6954 len -= num_clusters;
6959 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 6955 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
@@ -6982,8 +6978,7 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
6982 struct ocfs2_alloc_context *data_ac = NULL; 6978 struct ocfs2_alloc_context *data_ac = NULL;
6983 struct ocfs2_extent_tree et; 6979 struct ocfs2_extent_tree et;
6984 6980
6985 mlog(0, "reflink xattr buckets %llu len %u\n", 6981 trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len);
6986 (unsigned long long)blkno, len);
6987 6982
6988 ocfs2_init_xattr_tree_extent_tree(&et, 6983 ocfs2_init_xattr_tree_extent_tree(&et,
6989 INODE_CACHE(args->reflink->new_inode), 6984 INODE_CACHE(args->reflink->new_inode),
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 8a6d34fa668a..d738a7e493dd 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -372,7 +372,6 @@ const struct address_space_operations omfs_aops = {
372 .readpages = omfs_readpages, 372 .readpages = omfs_readpages,
373 .writepage = omfs_writepage, 373 .writepage = omfs_writepage,
374 .writepages = omfs_writepages, 374 .writepages = omfs_writepages,
375 .sync_page = block_sync_page,
376 .write_begin = omfs_write_begin, 375 .write_begin = omfs_write_begin,
377 .write_end = generic_write_end, 376 .write_end = generic_write_end,
378 .bmap = omfs_bmap, 377 .bmap = omfs_bmap,
diff --git a/fs/open.c b/fs/open.c
index f83ca80cc59a..b52cf013ffa1 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -835,17 +835,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
835 835
836 validate_creds(cred); 836 validate_creds(cred);
837 837
838 /* 838 /* We must always pass in a valid mount pointer. */
839 * We must always pass in a valid mount pointer. Historically 839 BUG_ON(!mnt);
840 * callers got away with not passing it, but we must enforce this at
841 * the earliest possible point now to avoid strange problems deep in the
842 * filesystem stack.
843 */
844 if (!mnt) {
845 printk(KERN_WARNING "%s called with NULL vfsmount\n", __func__);
846 dump_stack();
847 return ERR_PTR(-EINVAL);
848 }
849 840
850 error = -ENFILE; 841 error = -ENFILE;
851 f = get_empty_filp(); 842 f = get_empty_filp();
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 9c21119512b9..ac546975031f 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -290,7 +290,8 @@ ssize_t part_inflight_show(struct device *dev,
290{ 290{
291 struct hd_struct *p = dev_to_part(dev); 291 struct hd_struct *p = dev_to_part(dev);
292 292
293 return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]); 293 return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]),
294 atomic_read(&p->in_flight[1]));
294} 295}
295 296
296#ifdef CONFIG_FAIL_MAKE_REQUEST 297#ifdef CONFIG_FAIL_MAKE_REQUEST
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 7c99c1cf7e5c..5e4f776b0917 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -489,8 +489,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
489 vsize, 489 vsize,
490 mm ? get_mm_rss(mm) : 0, 490 mm ? get_mm_rss(mm) : 0,
491 rsslim, 491 rsslim,
492 mm ? mm->start_code : 0, 492 mm ? (permitted ? mm->start_code : 1) : 0,
493 mm ? mm->end_code : 0, 493 mm ? (permitted ? mm->end_code : 1) : 0,
494 (permitted && mm) ? mm->start_stack : 0, 494 (permitted && mm) ? mm->start_stack : 0,
495 esp, 495 esp,
496 eip, 496 eip,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d49c4b5d2c3e..5a670c11aeac 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -191,17 +191,20 @@ static int proc_root_link(struct inode *inode, struct path *path)
191 return result; 191 return result;
192} 192}
193 193
194/* 194static struct mm_struct *__check_mem_permission(struct task_struct *task)
195 * Return zero if current may access user memory in @task, -error if not.
196 */
197static int check_mem_permission(struct task_struct *task)
198{ 195{
196 struct mm_struct *mm;
197
198 mm = get_task_mm(task);
199 if (!mm)
200 return ERR_PTR(-EINVAL);
201
199 /* 202 /*
200 * A task can always look at itself, in case it chooses 203 * A task can always look at itself, in case it chooses
201 * to use system calls instead of load instructions. 204 * to use system calls instead of load instructions.
202 */ 205 */
203 if (task == current) 206 if (task == current)
204 return 0; 207 return mm;
205 208
206 /* 209 /*
207 * If current is actively ptrace'ing, and would also be 210 * If current is actively ptrace'ing, and would also be
@@ -213,27 +216,53 @@ static int check_mem_permission(struct task_struct *task)
213 match = (tracehook_tracer_task(task) == current); 216 match = (tracehook_tracer_task(task) == current);
214 rcu_read_unlock(); 217 rcu_read_unlock();
215 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) 218 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
216 return 0; 219 return mm;
217 } 220 }
218 221
219 /* 222 /*
220 * Noone else is allowed. 223 * Noone else is allowed.
221 */ 224 */
222 return -EPERM; 225 mmput(mm);
226 return ERR_PTR(-EPERM);
227}
228
229/*
230 * If current may access user memory in @task return a reference to the
231 * corresponding mm, otherwise ERR_PTR.
232 */
233static struct mm_struct *check_mem_permission(struct task_struct *task)
234{
235 struct mm_struct *mm;
236 int err;
237
238 /*
239 * Avoid racing if task exec's as we might get a new mm but validate
240 * against old credentials.
241 */
242 err = mutex_lock_killable(&task->signal->cred_guard_mutex);
243 if (err)
244 return ERR_PTR(err);
245
246 mm = __check_mem_permission(task);
247 mutex_unlock(&task->signal->cred_guard_mutex);
248
249 return mm;
223} 250}
224 251
225struct mm_struct *mm_for_maps(struct task_struct *task) 252struct mm_struct *mm_for_maps(struct task_struct *task)
226{ 253{
227 struct mm_struct *mm; 254 struct mm_struct *mm;
255 int err;
228 256
229 if (mutex_lock_killable(&task->signal->cred_guard_mutex)) 257 err = mutex_lock_killable(&task->signal->cred_guard_mutex);
230 return NULL; 258 if (err)
259 return ERR_PTR(err);
231 260
232 mm = get_task_mm(task); 261 mm = get_task_mm(task);
233 if (mm && mm != current->mm && 262 if (mm && mm != current->mm &&
234 !ptrace_may_access(task, PTRACE_MODE_READ)) { 263 !ptrace_may_access(task, PTRACE_MODE_READ)) {
235 mmput(mm); 264 mmput(mm);
236 mm = NULL; 265 mm = ERR_PTR(-EACCES);
237 } 266 }
238 mutex_unlock(&task->signal->cred_guard_mutex); 267 mutex_unlock(&task->signal->cred_guard_mutex);
239 268
@@ -279,9 +308,9 @@ out:
279 308
280static int proc_pid_auxv(struct task_struct *task, char *buffer) 309static int proc_pid_auxv(struct task_struct *task, char *buffer)
281{ 310{
282 int res = 0; 311 struct mm_struct *mm = mm_for_maps(task);
283 struct mm_struct *mm = get_task_mm(task); 312 int res = PTR_ERR(mm);
284 if (mm) { 313 if (mm && !IS_ERR(mm)) {
285 unsigned int nwords = 0; 314 unsigned int nwords = 0;
286 do { 315 do {
287 nwords += 2; 316 nwords += 2;
@@ -318,6 +347,23 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
318} 347}
319#endif /* CONFIG_KALLSYMS */ 348#endif /* CONFIG_KALLSYMS */
320 349
350static int lock_trace(struct task_struct *task)
351{
352 int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
353 if (err)
354 return err;
355 if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
356 mutex_unlock(&task->signal->cred_guard_mutex);
357 return -EPERM;
358 }
359 return 0;
360}
361
362static void unlock_trace(struct task_struct *task)
363{
364 mutex_unlock(&task->signal->cred_guard_mutex);
365}
366
321#ifdef CONFIG_STACKTRACE 367#ifdef CONFIG_STACKTRACE
322 368
323#define MAX_STACK_TRACE_DEPTH 64 369#define MAX_STACK_TRACE_DEPTH 64
@@ -327,6 +373,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
327{ 373{
328 struct stack_trace trace; 374 struct stack_trace trace;
329 unsigned long *entries; 375 unsigned long *entries;
376 int err;
330 int i; 377 int i;
331 378
332 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); 379 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
@@ -337,15 +384,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
337 trace.max_entries = MAX_STACK_TRACE_DEPTH; 384 trace.max_entries = MAX_STACK_TRACE_DEPTH;
338 trace.entries = entries; 385 trace.entries = entries;
339 trace.skip = 0; 386 trace.skip = 0;
340 save_stack_trace_tsk(task, &trace);
341 387
342 for (i = 0; i < trace.nr_entries; i++) { 388 err = lock_trace(task);
343 seq_printf(m, "[<%p>] %pS\n", 389 if (!err) {
344 (void *)entries[i], (void *)entries[i]); 390 save_stack_trace_tsk(task, &trace);
391
392 for (i = 0; i < trace.nr_entries; i++) {
393 seq_printf(m, "[<%pK>] %pS\n",
394 (void *)entries[i], (void *)entries[i]);
395 }
396 unlock_trace(task);
345 } 397 }
346 kfree(entries); 398 kfree(entries);
347 399
348 return 0; 400 return err;
349} 401}
350#endif 402#endif
351 403
@@ -508,18 +560,22 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer)
508{ 560{
509 long nr; 561 long nr;
510 unsigned long args[6], sp, pc; 562 unsigned long args[6], sp, pc;
563 int res = lock_trace(task);
564 if (res)
565 return res;
511 566
512 if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) 567 if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
513 return sprintf(buffer, "running\n"); 568 res = sprintf(buffer, "running\n");
514 569 else if (nr < 0)
515 if (nr < 0) 570 res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
516 return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); 571 else
517 572 res = sprintf(buffer,
518 return sprintf(buffer,
519 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", 573 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
520 nr, 574 nr,
521 args[0], args[1], args[2], args[3], args[4], args[5], 575 args[0], args[1], args[2], args[3], args[4], args[5],
522 sp, pc); 576 sp, pc);
577 unlock_trace(task);
578 return res;
523} 579}
524#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ 580#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
525 581
@@ -775,18 +831,14 @@ static ssize_t mem_read(struct file * file, char __user * buf,
775 if (!task) 831 if (!task)
776 goto out_no_task; 832 goto out_no_task;
777 833
778 if (check_mem_permission(task))
779 goto out;
780
781 ret = -ENOMEM; 834 ret = -ENOMEM;
782 page = (char *)__get_free_page(GFP_TEMPORARY); 835 page = (char *)__get_free_page(GFP_TEMPORARY);
783 if (!page) 836 if (!page)
784 goto out; 837 goto out;
785 838
786 ret = 0; 839 mm = check_mem_permission(task);
787 840 ret = PTR_ERR(mm);
788 mm = get_task_mm(task); 841 if (IS_ERR(mm))
789 if (!mm)
790 goto out_free; 842 goto out_free;
791 843
792 ret = -EIO; 844 ret = -EIO;
@@ -800,8 +852,8 @@ static ssize_t mem_read(struct file * file, char __user * buf,
800 int this_len, retval; 852 int this_len, retval;
801 853
802 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 854 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
803 retval = access_process_vm(task, src, page, this_len, 0); 855 retval = access_remote_vm(mm, src, page, this_len, 0);
804 if (!retval || check_mem_permission(task)) { 856 if (!retval) {
805 if (!ret) 857 if (!ret)
806 ret = -EIO; 858 ret = -EIO;
807 break; 859 break;
@@ -829,10 +881,6 @@ out_no_task:
829 return ret; 881 return ret;
830} 882}
831 883
832#define mem_write NULL
833
834#ifndef mem_write
835/* This is a security hazard */
836static ssize_t mem_write(struct file * file, const char __user *buf, 884static ssize_t mem_write(struct file * file, const char __user *buf,
837 size_t count, loff_t *ppos) 885 size_t count, loff_t *ppos)
838{ 886{
@@ -840,18 +888,25 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
840 char *page; 888 char *page;
841 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 889 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
842 unsigned long dst = *ppos; 890 unsigned long dst = *ppos;
891 struct mm_struct *mm;
843 892
844 copied = -ESRCH; 893 copied = -ESRCH;
845 if (!task) 894 if (!task)
846 goto out_no_task; 895 goto out_no_task;
847 896
848 if (check_mem_permission(task)) 897 mm = check_mem_permission(task);
849 goto out; 898 copied = PTR_ERR(mm);
899 if (IS_ERR(mm))
900 goto out_task;
901
902 copied = -EIO;
903 if (file->private_data != (void *)((long)current->self_exec_id))
904 goto out_mm;
850 905
851 copied = -ENOMEM; 906 copied = -ENOMEM;
852 page = (char *)__get_free_page(GFP_TEMPORARY); 907 page = (char *)__get_free_page(GFP_TEMPORARY);
853 if (!page) 908 if (!page)
854 goto out; 909 goto out_mm;
855 910
856 copied = 0; 911 copied = 0;
857 while (count > 0) { 912 while (count > 0) {
@@ -862,7 +917,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
862 copied = -EFAULT; 917 copied = -EFAULT;
863 break; 918 break;
864 } 919 }
865 retval = access_process_vm(task, dst, page, this_len, 1); 920 retval = access_remote_vm(mm, dst, page, this_len, 1);
866 if (!retval) { 921 if (!retval) {
867 if (!copied) 922 if (!copied)
868 copied = -EIO; 923 copied = -EIO;
@@ -875,12 +930,13 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
875 } 930 }
876 *ppos = dst; 931 *ppos = dst;
877 free_page((unsigned long) page); 932 free_page((unsigned long) page);
878out: 933out_mm:
934 mmput(mm);
935out_task:
879 put_task_struct(task); 936 put_task_struct(task);
880out_no_task: 937out_no_task:
881 return copied; 938 return copied;
882} 939}
883#endif
884 940
885loff_t mem_lseek(struct file *file, loff_t offset, int orig) 941loff_t mem_lseek(struct file *file, loff_t offset, int orig)
886{ 942{
@@ -917,20 +973,18 @@ static ssize_t environ_read(struct file *file, char __user *buf,
917 if (!task) 973 if (!task)
918 goto out_no_task; 974 goto out_no_task;
919 975
920 if (!ptrace_may_access(task, PTRACE_MODE_READ))
921 goto out;
922
923 ret = -ENOMEM; 976 ret = -ENOMEM;
924 page = (char *)__get_free_page(GFP_TEMPORARY); 977 page = (char *)__get_free_page(GFP_TEMPORARY);
925 if (!page) 978 if (!page)
926 goto out; 979 goto out;
927 980
928 ret = 0;
929 981
930 mm = get_task_mm(task); 982 mm = mm_for_maps(task);
931 if (!mm) 983 ret = PTR_ERR(mm);
984 if (!mm || IS_ERR(mm))
932 goto out_free; 985 goto out_free;
933 986
987 ret = 0;
934 while (count > 0) { 988 while (count > 0) {
935 int this_len, retval, max_len; 989 int this_len, retval, max_len;
936 990
@@ -2748,8 +2802,12 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2748static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2802static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
2749 struct pid *pid, struct task_struct *task) 2803 struct pid *pid, struct task_struct *task)
2750{ 2804{
2751 seq_printf(m, "%08x\n", task->personality); 2805 int err = lock_trace(task);
2752 return 0; 2806 if (!err) {
2807 seq_printf(m, "%08x\n", task->personality);
2808 unlock_trace(task);
2809 }
2810 return err;
2753} 2811}
2754 2812
2755/* 2813/*
@@ -2768,7 +2826,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2768 REG("environ", S_IRUSR, proc_environ_operations), 2826 REG("environ", S_IRUSR, proc_environ_operations),
2769 INF("auxv", S_IRUSR, proc_pid_auxv), 2827 INF("auxv", S_IRUSR, proc_pid_auxv),
2770 ONE("status", S_IRUGO, proc_pid_status), 2828 ONE("status", S_IRUGO, proc_pid_status),
2771 ONE("personality", S_IRUSR, proc_pid_personality), 2829 ONE("personality", S_IRUGO, proc_pid_personality),
2772 INF("limits", S_IRUGO, proc_pid_limits), 2830 INF("limits", S_IRUGO, proc_pid_limits),
2773#ifdef CONFIG_SCHED_DEBUG 2831#ifdef CONFIG_SCHED_DEBUG
2774 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2832 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
@@ -2778,7 +2836,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2778#endif 2836#endif
2779 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2837 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2780#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2838#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2781 INF("syscall", S_IRUSR, proc_pid_syscall), 2839 INF("syscall", S_IRUGO, proc_pid_syscall),
2782#endif 2840#endif
2783 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2841 INF("cmdline", S_IRUGO, proc_pid_cmdline),
2784 ONE("stat", S_IRUGO, proc_tgid_stat), 2842 ONE("stat", S_IRUGO, proc_tgid_stat),
@@ -2797,7 +2855,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2797#ifdef CONFIG_PROC_PAGE_MONITOR 2855#ifdef CONFIG_PROC_PAGE_MONITOR
2798 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2856 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
2799 REG("smaps", S_IRUGO, proc_smaps_operations), 2857 REG("smaps", S_IRUGO, proc_smaps_operations),
2800 REG("pagemap", S_IRUSR, proc_pagemap_operations), 2858 REG("pagemap", S_IRUGO, proc_pagemap_operations),
2801#endif 2859#endif
2802#ifdef CONFIG_SECURITY 2860#ifdef CONFIG_SECURITY
2803 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2861 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -2806,7 +2864,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2806 INF("wchan", S_IRUGO, proc_pid_wchan), 2864 INF("wchan", S_IRUGO, proc_pid_wchan),
2807#endif 2865#endif
2808#ifdef CONFIG_STACKTRACE 2866#ifdef CONFIG_STACKTRACE
2809 ONE("stack", S_IRUSR, proc_pid_stack), 2867 ONE("stack", S_IRUGO, proc_pid_stack),
2810#endif 2868#endif
2811#ifdef CONFIG_SCHEDSTATS 2869#ifdef CONFIG_SCHEDSTATS
2812 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2870 INF("schedstat", S_IRUGO, proc_pid_schedstat),
@@ -3108,14 +3166,14 @@ static const struct pid_entry tid_base_stuff[] = {
3108 REG("environ", S_IRUSR, proc_environ_operations), 3166 REG("environ", S_IRUSR, proc_environ_operations),
3109 INF("auxv", S_IRUSR, proc_pid_auxv), 3167 INF("auxv", S_IRUSR, proc_pid_auxv),
3110 ONE("status", S_IRUGO, proc_pid_status), 3168 ONE("status", S_IRUGO, proc_pid_status),
3111 ONE("personality", S_IRUSR, proc_pid_personality), 3169 ONE("personality", S_IRUGO, proc_pid_personality),
3112 INF("limits", S_IRUGO, proc_pid_limits), 3170 INF("limits", S_IRUGO, proc_pid_limits),
3113#ifdef CONFIG_SCHED_DEBUG 3171#ifdef CONFIG_SCHED_DEBUG
3114 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 3172 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
3115#endif 3173#endif
3116 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 3174 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
3117#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 3175#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
3118 INF("syscall", S_IRUSR, proc_pid_syscall), 3176 INF("syscall", S_IRUGO, proc_pid_syscall),
3119#endif 3177#endif
3120 INF("cmdline", S_IRUGO, proc_pid_cmdline), 3178 INF("cmdline", S_IRUGO, proc_pid_cmdline),
3121 ONE("stat", S_IRUGO, proc_tid_stat), 3179 ONE("stat", S_IRUGO, proc_tid_stat),
@@ -3133,7 +3191,7 @@ static const struct pid_entry tid_base_stuff[] = {
3133#ifdef CONFIG_PROC_PAGE_MONITOR 3191#ifdef CONFIG_PROC_PAGE_MONITOR
3134 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3192 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3135 REG("smaps", S_IRUGO, proc_smaps_operations), 3193 REG("smaps", S_IRUGO, proc_smaps_operations),
3136 REG("pagemap", S_IRUSR, proc_pagemap_operations), 3194 REG("pagemap", S_IRUGO, proc_pagemap_operations),
3137#endif 3195#endif
3138#ifdef CONFIG_SECURITY 3196#ifdef CONFIG_SECURITY
3139 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 3197 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -3142,7 +3200,7 @@ static const struct pid_entry tid_base_stuff[] = {
3142 INF("wchan", S_IRUGO, proc_pid_wchan), 3200 INF("wchan", S_IRUGO, proc_pid_wchan),
3143#endif 3201#endif
3144#ifdef CONFIG_STACKTRACE 3202#ifdef CONFIG_STACKTRACE
3145 ONE("stack", S_IRUSR, proc_pid_stack), 3203 ONE("stack", S_IRUGO, proc_pid_stack),
3146#endif 3204#endif
3147#ifdef CONFIG_SCHEDSTATS 3205#ifdef CONFIG_SCHEDSTATS
3148 INF("schedstat", S_IRUGO, proc_pid_schedstat), 3206 INF("schedstat", S_IRUGO, proc_pid_schedstat),
@@ -3161,7 +3219,7 @@ static const struct pid_entry tid_base_stuff[] = {
3161 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3219 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3162#ifdef CONFIG_AUDITSYSCALL 3220#ifdef CONFIG_AUDITSYSCALL
3163 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3221 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
3164 REG("sessionid", S_IRUSR, proc_sessionid_operations), 3222 REG("sessionid", S_IRUGO, proc_sessionid_operations),
3165#endif 3223#endif
3166#ifdef CONFIG_FAULT_INJECTION 3224#ifdef CONFIG_FAULT_INJECTION
3167 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 3225 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 01e07f2a188f..f1281339b6fa 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -28,7 +28,7 @@
28 28
29DEFINE_SPINLOCK(proc_subdir_lock); 29DEFINE_SPINLOCK(proc_subdir_lock);
30 30
31static int proc_match(int len, const char *name, struct proc_dir_entry *de) 31static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
32{ 32{
33 if (de->namelen != len) 33 if (de->namelen != len)
34 return 0; 34 return 0;
@@ -303,7 +303,7 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret,
303{ 303{
304 const char *cp = name, *next; 304 const char *cp = name, *next;
305 struct proc_dir_entry *de; 305 struct proc_dir_entry *de;
306 int len; 306 unsigned int len;
307 307
308 de = *ret; 308 de = *ret;
309 if (!de) 309 if (!de)
@@ -602,7 +602,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
602{ 602{
603 struct proc_dir_entry *ent = NULL; 603 struct proc_dir_entry *ent = NULL;
604 const char *fn = name; 604 const char *fn = name;
605 int len; 605 unsigned int len;
606 606
607 /* make sure name is valid */ 607 /* make sure name is valid */
608 if (!name || !strlen(name)) goto out; 608 if (!name || !strlen(name)) goto out;
@@ -786,7 +786,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
786 struct proc_dir_entry **p; 786 struct proc_dir_entry **p;
787 struct proc_dir_entry *de = NULL; 787 struct proc_dir_entry *de = NULL;
788 const char *fn = name; 788 const char *fn = name;
789 int len; 789 unsigned int len;
790 790
791 spin_lock(&proc_subdir_lock); 791 spin_lock(&proc_subdir_lock);
792 if (__xlate_proc_name(name, &parent, &fn) != 0) { 792 if (__xlate_proc_name(name, &parent, &fn) != 0) {
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d6a7ca1fdac5..d15aa1b1cc8f 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -46,8 +46,6 @@ static void proc_evict_inode(struct inode *inode)
46 } 46 }
47} 47}
48 48
49struct vfsmount *proc_mnt;
50
51static struct kmem_cache * proc_inode_cachep; 49static struct kmem_cache * proc_inode_cachep;
52 50
53static struct inode *proc_alloc_inode(struct super_block *sb) 51static struct inode *proc_alloc_inode(struct super_block *sb)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 9ad561ded409..c03e8d3a3a5b 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -107,7 +107,6 @@ static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
107} 107}
108void pde_put(struct proc_dir_entry *pde); 108void pde_put(struct proc_dir_entry *pde);
109 109
110extern struct vfsmount *proc_mnt;
111int proc_fill_super(struct super_block *); 110int proc_fill_super(struct super_block *);
112struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); 111struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
113 112
diff --git a/fs/proc/root.c b/fs/proc/root.c
index ef9fa8e24ad6..a9000e9cfee5 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -43,17 +43,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
43 struct pid_namespace *ns; 43 struct pid_namespace *ns;
44 struct proc_inode *ei; 44 struct proc_inode *ei;
45 45
46 if (proc_mnt) {
47 /* Seed the root directory with a pid so it doesn't need
48 * to be special in base.c. I would do this earlier but
49 * the only task alive when /proc is mounted the first time
50 * is the init_task and it doesn't have any pids.
51 */
52 ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
53 if (!ei->pid)
54 ei->pid = find_get_pid(1);
55 }
56
57 if (flags & MS_KERNMOUNT) 46 if (flags & MS_KERNMOUNT)
58 ns = (struct pid_namespace *)data; 47 ns = (struct pid_namespace *)data;
59 else 48 else
@@ -71,16 +60,16 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
71 return ERR_PTR(err); 60 return ERR_PTR(err);
72 } 61 }
73 62
74 ei = PROC_I(sb->s_root->d_inode);
75 if (!ei->pid) {
76 rcu_read_lock();
77 ei->pid = get_pid(find_pid_ns(1, ns));
78 rcu_read_unlock();
79 }
80
81 sb->s_flags |= MS_ACTIVE; 63 sb->s_flags |= MS_ACTIVE;
82 } 64 }
83 65
66 ei = PROC_I(sb->s_root->d_inode);
67 if (!ei->pid) {
68 rcu_read_lock();
69 ei->pid = get_pid(find_pid_ns(1, ns));
70 rcu_read_unlock();
71 }
72
84 return dget(sb->s_root); 73 return dget(sb->s_root);
85} 74}
86 75
@@ -101,19 +90,20 @@ static struct file_system_type proc_fs_type = {
101 90
102void __init proc_root_init(void) 91void __init proc_root_init(void)
103{ 92{
93 struct vfsmount *mnt;
104 int err; 94 int err;
105 95
106 proc_init_inodecache(); 96 proc_init_inodecache();
107 err = register_filesystem(&proc_fs_type); 97 err = register_filesystem(&proc_fs_type);
108 if (err) 98 if (err)
109 return; 99 return;
110 proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); 100 mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
111 if (IS_ERR(proc_mnt)) { 101 if (IS_ERR(mnt)) {
112 unregister_filesystem(&proc_fs_type); 102 unregister_filesystem(&proc_fs_type);
113 return; 103 return;
114 } 104 }
115 105
116 init_pid_ns.proc_mnt = proc_mnt; 106 init_pid_ns.proc_mnt = mnt;
117 proc_symlink("mounts", NULL, "self/mounts"); 107 proc_symlink("mounts", NULL, "self/mounts");
118 108
119 proc_net_init(); 109 proc_net_init();
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 60b914860f81..2e7addfd9803 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,5 +1,6 @@
1#include <linux/mm.h> 1#include <linux/mm.h>
2#include <linux/hugetlb.h> 2#include <linux/hugetlb.h>
3#include <linux/huge_mm.h>
3#include <linux/mount.h> 4#include <linux/mount.h>
4#include <linux/seq_file.h> 5#include <linux/seq_file.h>
5#include <linux/highmem.h> 6#include <linux/highmem.h>
@@ -7,6 +8,7 @@
7#include <linux/slab.h> 8#include <linux/slab.h>
8#include <linux/pagemap.h> 9#include <linux/pagemap.h>
9#include <linux/mempolicy.h> 10#include <linux/mempolicy.h>
11#include <linux/rmap.h>
10#include <linux/swap.h> 12#include <linux/swap.h>
11#include <linux/swapops.h> 13#include <linux/swapops.h>
12 14
@@ -119,14 +121,14 @@ static void *m_start(struct seq_file *m, loff_t *pos)
119 121
120 priv->task = get_pid_task(priv->pid, PIDTYPE_PID); 122 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
121 if (!priv->task) 123 if (!priv->task)
122 return NULL; 124 return ERR_PTR(-ESRCH);
123 125
124 mm = mm_for_maps(priv->task); 126 mm = mm_for_maps(priv->task);
125 if (!mm) 127 if (!mm || IS_ERR(mm))
126 return NULL; 128 return mm;
127 down_read(&mm->mmap_sem); 129 down_read(&mm->mmap_sem);
128 130
129 tail_vma = get_gate_vma(priv->task); 131 tail_vma = get_gate_vma(priv->task->mm);
130 priv->tail_vma = tail_vma; 132 priv->tail_vma = tail_vma;
131 133
132 /* Start with last addr hint */ 134 /* Start with last addr hint */
@@ -180,7 +182,8 @@ static void m_stop(struct seq_file *m, void *v)
180 struct proc_maps_private *priv = m->private; 182 struct proc_maps_private *priv = m->private;
181 struct vm_area_struct *vma = v; 183 struct vm_area_struct *vma = v;
182 184
183 vma_stop(priv, vma); 185 if (!IS_ERR(vma))
186 vma_stop(priv, vma);
184 if (priv->task) 187 if (priv->task)
185 put_task_struct(priv->task); 188 put_task_struct(priv->task);
186} 189}
@@ -249,8 +252,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
249 const char *name = arch_vma_name(vma); 252 const char *name = arch_vma_name(vma);
250 if (!name) { 253 if (!name) {
251 if (mm) { 254 if (mm) {
252 if (vma->vm_start <= mm->start_brk && 255 if (vma->vm_start <= mm->brk &&
253 vma->vm_end >= mm->brk) { 256 vma->vm_end >= mm->start_brk) {
254 name = "[heap]"; 257 name = "[heap]";
255 } else if (vma->vm_start <= mm->start_stack && 258 } else if (vma->vm_start <= mm->start_stack &&
256 vma->vm_end >= mm->start_stack) { 259 vma->vm_end >= mm->start_stack) {
@@ -277,7 +280,8 @@ static int show_map(struct seq_file *m, void *v)
277 show_map_vma(m, vma); 280 show_map_vma(m, vma);
278 281
279 if (m->count < m->size) /* vma is copied successfully */ 282 if (m->count < m->size) /* vma is copied successfully */
280 m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; 283 m->version = (vma != get_gate_vma(task->mm))
284 ? vma->vm_start : 0;
281 return 0; 285 return 0;
282} 286}
283 287
@@ -329,58 +333,86 @@ struct mem_size_stats {
329 unsigned long private_dirty; 333 unsigned long private_dirty;
330 unsigned long referenced; 334 unsigned long referenced;
331 unsigned long anonymous; 335 unsigned long anonymous;
336 unsigned long anonymous_thp;
332 unsigned long swap; 337 unsigned long swap;
333 u64 pss; 338 u64 pss;
334}; 339};
335 340
336static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 341
337 struct mm_walk *walk) 342static void smaps_pte_entry(pte_t ptent, unsigned long addr,
343 unsigned long ptent_size, struct mm_walk *walk)
338{ 344{
339 struct mem_size_stats *mss = walk->private; 345 struct mem_size_stats *mss = walk->private;
340 struct vm_area_struct *vma = mss->vma; 346 struct vm_area_struct *vma = mss->vma;
341 pte_t *pte, ptent;
342 spinlock_t *ptl;
343 struct page *page; 347 struct page *page;
344 int mapcount; 348 int mapcount;
345 349
346 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 350 if (is_swap_pte(ptent)) {
347 for (; addr != end; pte++, addr += PAGE_SIZE) { 351 mss->swap += ptent_size;
348 ptent = *pte; 352 return;
349 353 }
350 if (is_swap_pte(ptent)) {
351 mss->swap += PAGE_SIZE;
352 continue;
353 }
354 354
355 if (!pte_present(ptent)) 355 if (!pte_present(ptent))
356 continue; 356 return;
357
358 page = vm_normal_page(vma, addr, ptent);
359 if (!page)
360 return;
361
362 if (PageAnon(page))
363 mss->anonymous += ptent_size;
364
365 mss->resident += ptent_size;
366 /* Accumulate the size in pages that have been accessed. */
367 if (pte_young(ptent) || PageReferenced(page))
368 mss->referenced += ptent_size;
369 mapcount = page_mapcount(page);
370 if (mapcount >= 2) {
371 if (pte_dirty(ptent) || PageDirty(page))
372 mss->shared_dirty += ptent_size;
373 else
374 mss->shared_clean += ptent_size;
375 mss->pss += (ptent_size << PSS_SHIFT) / mapcount;
376 } else {
377 if (pte_dirty(ptent) || PageDirty(page))
378 mss->private_dirty += ptent_size;
379 else
380 mss->private_clean += ptent_size;
381 mss->pss += (ptent_size << PSS_SHIFT);
382 }
383}
357 384
358 page = vm_normal_page(vma, addr, ptent); 385static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
359 if (!page) 386 struct mm_walk *walk)
360 continue; 387{
388 struct mem_size_stats *mss = walk->private;
389 struct vm_area_struct *vma = mss->vma;
390 pte_t *pte;
391 spinlock_t *ptl;
361 392
362 if (PageAnon(page)) 393 spin_lock(&walk->mm->page_table_lock);
363 mss->anonymous += PAGE_SIZE; 394 if (pmd_trans_huge(*pmd)) {
364 395 if (pmd_trans_splitting(*pmd)) {
365 mss->resident += PAGE_SIZE; 396 spin_unlock(&walk->mm->page_table_lock);
366 /* Accumulate the size in pages that have been accessed. */ 397 wait_split_huge_page(vma->anon_vma, pmd);
367 if (pte_young(ptent) || PageReferenced(page))
368 mss->referenced += PAGE_SIZE;
369 mapcount = page_mapcount(page);
370 if (mapcount >= 2) {
371 if (pte_dirty(ptent) || PageDirty(page))
372 mss->shared_dirty += PAGE_SIZE;
373 else
374 mss->shared_clean += PAGE_SIZE;
375 mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
376 } else { 398 } else {
377 if (pte_dirty(ptent) || PageDirty(page)) 399 smaps_pte_entry(*(pte_t *)pmd, addr,
378 mss->private_dirty += PAGE_SIZE; 400 HPAGE_PMD_SIZE, walk);
379 else 401 spin_unlock(&walk->mm->page_table_lock);
380 mss->private_clean += PAGE_SIZE; 402 mss->anonymous_thp += HPAGE_PMD_SIZE;
381 mss->pss += (PAGE_SIZE << PSS_SHIFT); 403 return 0;
382 } 404 }
405 } else {
406 spin_unlock(&walk->mm->page_table_lock);
383 } 407 }
408 /*
409 * The mmap_sem held all the way back in m_start() is what
410 * keeps khugepaged out of here and from collapsing things
411 * in here.
412 */
413 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
414 for (; addr != end; pte++, addr += PAGE_SIZE)
415 smaps_pte_entry(*pte, addr, PAGE_SIZE, walk);
384 pte_unmap_unlock(pte - 1, ptl); 416 pte_unmap_unlock(pte - 1, ptl);
385 cond_resched(); 417 cond_resched();
386 return 0; 418 return 0;
@@ -416,6 +448,7 @@ static int show_smap(struct seq_file *m, void *v)
416 "Private_Dirty: %8lu kB\n" 448 "Private_Dirty: %8lu kB\n"
417 "Referenced: %8lu kB\n" 449 "Referenced: %8lu kB\n"
418 "Anonymous: %8lu kB\n" 450 "Anonymous: %8lu kB\n"
451 "AnonHugePages: %8lu kB\n"
419 "Swap: %8lu kB\n" 452 "Swap: %8lu kB\n"
420 "KernelPageSize: %8lu kB\n" 453 "KernelPageSize: %8lu kB\n"
421 "MMUPageSize: %8lu kB\n" 454 "MMUPageSize: %8lu kB\n"
@@ -429,6 +462,7 @@ static int show_smap(struct seq_file *m, void *v)
429 mss.private_dirty >> 10, 462 mss.private_dirty >> 10,
430 mss.referenced >> 10, 463 mss.referenced >> 10,
431 mss.anonymous >> 10, 464 mss.anonymous >> 10,
465 mss.anonymous_thp >> 10,
432 mss.swap >> 10, 466 mss.swap >> 10,
433 vma_kernel_pagesize(vma) >> 10, 467 vma_kernel_pagesize(vma) >> 10,
434 vma_mmu_pagesize(vma) >> 10, 468 vma_mmu_pagesize(vma) >> 10,
@@ -436,7 +470,8 @@ static int show_smap(struct seq_file *m, void *v)
436 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); 470 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
437 471
438 if (m->count < m->size) /* vma is copied successfully */ 472 if (m->count < m->size) /* vma is copied successfully */
439 m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; 473 m->version = (vma != get_gate_vma(task->mm))
474 ? vma->vm_start : 0;
440 return 0; 475 return 0;
441} 476}
442 477
@@ -467,6 +502,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
467 spinlock_t *ptl; 502 spinlock_t *ptl;
468 struct page *page; 503 struct page *page;
469 504
505 split_huge_page_pmd(walk->mm, pmd);
506
470 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 507 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
471 for (; addr != end; pte++, addr += PAGE_SIZE) { 508 for (; addr != end; pte++, addr += PAGE_SIZE) {
472 ptent = *pte; 509 ptent = *pte;
@@ -623,6 +660,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
623 pte_t *pte; 660 pte_t *pte;
624 int err = 0; 661 int err = 0;
625 662
663 split_huge_page_pmd(walk->mm, pmd);
664
626 /* find the first VMA at or above 'addr' */ 665 /* find the first VMA at or above 'addr' */
627 vma = find_vma(walk->mm, addr); 666 vma = find_vma(walk->mm, addr);
628 for (; addr != end; addr += PAGE_SIZE) { 667 for (; addr != end; addr += PAGE_SIZE) {
@@ -728,8 +767,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
728 if (!task) 767 if (!task)
729 goto out; 768 goto out;
730 769
731 ret = -EACCES; 770 mm = mm_for_maps(task);
732 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 771 ret = PTR_ERR(mm);
772 if (!mm || IS_ERR(mm))
733 goto out_task; 773 goto out_task;
734 774
735 ret = -EINVAL; 775 ret = -EINVAL;
@@ -742,10 +782,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
742 if (!count) 782 if (!count)
743 goto out_task; 783 goto out_task;
744 784
745 mm = get_task_mm(task);
746 if (!mm)
747 goto out_task;
748
749 pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); 785 pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
750 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); 786 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
751 ret = -ENOMEM; 787 ret = -ENOMEM;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index b535d3e5d5f1..980de547c070 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -199,13 +199,13 @@ static void *m_start(struct seq_file *m, loff_t *pos)
199 /* pin the task and mm whilst we play with them */ 199 /* pin the task and mm whilst we play with them */
200 priv->task = get_pid_task(priv->pid, PIDTYPE_PID); 200 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
201 if (!priv->task) 201 if (!priv->task)
202 return NULL; 202 return ERR_PTR(-ESRCH);
203 203
204 mm = mm_for_maps(priv->task); 204 mm = mm_for_maps(priv->task);
205 if (!mm) { 205 if (!mm || IS_ERR(mm)) {
206 put_task_struct(priv->task); 206 put_task_struct(priv->task);
207 priv->task = NULL; 207 priv->task = NULL;
208 return NULL; 208 return mm;
209 } 209 }
210 down_read(&mm->mmap_sem); 210 down_read(&mm->mmap_sem);
211 211
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 08342232cb1c..977ed2723845 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -27,6 +27,7 @@
27#include <linux/string.h> 27#include <linux/string.h>
28#include <linux/mount.h> 28#include <linux/mount.h>
29#include <linux/ramfs.h> 29#include <linux/ramfs.h>
30#include <linux/parser.h>
30#include <linux/sched.h> 31#include <linux/sched.h>
31#include <linux/magic.h> 32#include <linux/magic.h>
32#include <linux/pstore.h> 33#include <linux/pstore.h>
@@ -73,11 +74,16 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
73 struct pstore_private *p = dentry->d_inode->i_private; 74 struct pstore_private *p = dentry->d_inode->i_private;
74 75
75 p->erase(p->id); 76 p->erase(p->id);
76 kfree(p);
77 77
78 return simple_unlink(dir, dentry); 78 return simple_unlink(dir, dentry);
79} 79}
80 80
81static void pstore_evict_inode(struct inode *inode)
82{
83 end_writeback(inode);
84 kfree(inode->i_private);
85}
86
81static const struct inode_operations pstore_dir_inode_operations = { 87static const struct inode_operations pstore_dir_inode_operations = {
82 .lookup = simple_lookup, 88 .lookup = simple_lookup,
83 .unlink = pstore_unlink, 89 .unlink = pstore_unlink,
@@ -107,9 +113,52 @@ static struct inode *pstore_get_inode(struct super_block *sb,
107 return inode; 113 return inode;
108} 114}
109 115
116enum {
117 Opt_kmsg_bytes, Opt_err
118};
119
120static const match_table_t tokens = {
121 {Opt_kmsg_bytes, "kmsg_bytes=%u"},
122 {Opt_err, NULL}
123};
124
125static void parse_options(char *options)
126{
127 char *p;
128 substring_t args[MAX_OPT_ARGS];
129 int option;
130
131 if (!options)
132 return;
133
134 while ((p = strsep(&options, ",")) != NULL) {
135 int token;
136
137 if (!*p)
138 continue;
139
140 token = match_token(p, tokens, args);
141 switch (token) {
142 case Opt_kmsg_bytes:
143 if (!match_int(&args[0], &option))
144 pstore_set_kmsg_bytes(option);
145 break;
146 }
147 }
148}
149
150static int pstore_remount(struct super_block *sb, int *flags, char *data)
151{
152 parse_options(data);
153
154 return 0;
155}
156
110static const struct super_operations pstore_ops = { 157static const struct super_operations pstore_ops = {
111 .statfs = simple_statfs, 158 .statfs = simple_statfs,
112 .drop_inode = generic_delete_inode, 159 .drop_inode = generic_delete_inode,
160 .evict_inode = pstore_evict_inode,
161 .remount_fs = pstore_remount,
113 .show_options = generic_show_options, 162 .show_options = generic_show_options,
114}; 163};
115 164
@@ -209,6 +258,8 @@ int pstore_fill_super(struct super_block *sb, void *data, int silent)
209 sb->s_op = &pstore_ops; 258 sb->s_op = &pstore_ops;
210 sb->s_time_gran = 1; 259 sb->s_time_gran = 1;
211 260
261 parse_options(data);
262
212 inode = pstore_get_inode(sb, NULL, S_IFDIR | 0755, 0); 263 inode = pstore_get_inode(sb, NULL, S_IFDIR | 0755, 0);
213 if (!inode) { 264 if (!inode) {
214 err = -ENOMEM; 265 err = -ENOMEM;
@@ -252,28 +303,7 @@ static struct file_system_type pstore_fs_type = {
252 303
253static int __init init_pstore_fs(void) 304static int __init init_pstore_fs(void)
254{ 305{
255 int rc = 0; 306 return register_filesystem(&pstore_fs_type);
256 struct kobject *pstorefs_kobj;
257
258 pstorefs_kobj = kobject_create_and_add("pstore", fs_kobj);
259 if (!pstorefs_kobj) {
260 rc = -ENOMEM;
261 goto done;
262 }
263
264 rc = sysfs_create_file(pstorefs_kobj, &pstore_kmsg_bytes_attr.attr);
265 if (rc)
266 goto done1;
267
268 rc = register_filesystem(&pstore_fs_type);
269 if (rc == 0)
270 goto done;
271
272 sysfs_remove_file(pstorefs_kobj, &pstore_kmsg_bytes_attr.attr);
273done1:
274 kobject_put(pstorefs_kobj);
275done:
276 return rc;
277} 307}
278module_init(init_pstore_fs) 308module_init(init_pstore_fs)
279 309
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index 76c26d2fab29..8c9f23eb1645 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -1,7 +1,6 @@
1extern void pstore_set_kmsg_bytes(int);
1extern void pstore_get_records(void); 2extern void pstore_get_records(void);
2extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, 3extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id,
3 char *data, size_t size, 4 char *data, size_t size,
4 struct timespec time, int (*erase)(u64)); 5 struct timespec time, int (*erase)(u64));
5extern int pstore_is_mounted(void); 6extern int pstore_is_mounted(void);
6
7extern struct kobj_attribute pstore_kmsg_bytes_attr;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 705fdf8abf6e..f835a25625ff 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -37,27 +37,21 @@
37static DEFINE_SPINLOCK(pstore_lock); 37static DEFINE_SPINLOCK(pstore_lock);
38static struct pstore_info *psinfo; 38static struct pstore_info *psinfo;
39 39
40/* How much of the console log to snapshot. /sys/fs/pstore/kmsg_bytes */ 40/* How much of the console log to snapshot */
41static unsigned long kmsg_bytes = 10240; 41static unsigned long kmsg_bytes = 10240;
42 42
43static ssize_t b_show(struct kobject *kobj, 43void pstore_set_kmsg_bytes(int bytes)
44 struct kobj_attribute *attr, char *buf)
45{ 44{
46 return snprintf(buf, PAGE_SIZE, "%lu\n", kmsg_bytes); 45 kmsg_bytes = bytes;
47} 46}
48 47
49static ssize_t b_store(struct kobject *kobj, struct kobj_attribute *attr,
50 const char *buf, size_t count)
51{
52 return (sscanf(buf, "%lu", &kmsg_bytes) > 0) ? count : 0;
53}
54
55struct kobj_attribute pstore_kmsg_bytes_attr =
56 __ATTR(kmsg_bytes, S_IRUGO | S_IWUSR, b_show, b_store);
57
58/* Tag each group of saved records with a sequence number */ 48/* Tag each group of saved records with a sequence number */
59static int oopscount; 49static int oopscount;
60 50
51static char *reason_str[] = {
52 "Oops", "Panic", "Kexec", "Restart", "Halt", "Poweroff", "Emergency"
53};
54
61/* 55/*
62 * callback from kmsg_dump. (s2,l2) has the most recently 56 * callback from kmsg_dump. (s2,l2) has the most recently
63 * written bytes, older bytes are in (s1,l1). Save as much 57 * written bytes, older bytes are in (s1,l1). Save as much
@@ -71,15 +65,20 @@ static void pstore_dump(struct kmsg_dumper *dumper,
71 unsigned long s1_start, s2_start; 65 unsigned long s1_start, s2_start;
72 unsigned long l1_cpy, l2_cpy; 66 unsigned long l1_cpy, l2_cpy;
73 unsigned long size, total = 0; 67 unsigned long size, total = 0;
74 char *dst; 68 char *dst, *why;
75 u64 id; 69 u64 id;
76 int hsize, part = 1; 70 int hsize, part = 1;
77 71
72 if (reason < ARRAY_SIZE(reason_str))
73 why = reason_str[reason];
74 else
75 why = "Unknown";
76
78 mutex_lock(&psinfo->buf_mutex); 77 mutex_lock(&psinfo->buf_mutex);
79 oopscount++; 78 oopscount++;
80 while (total < kmsg_bytes) { 79 while (total < kmsg_bytes) {
81 dst = psinfo->buf; 80 dst = psinfo->buf;
82 hsize = sprintf(dst, "Oops#%d Part%d\n", oopscount, part++); 81 hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part++);
83 size = psinfo->bufsize - hsize; 82 size = psinfo->bufsize - hsize;
84 dst += hsize; 83 dst += hsize;
85 84
@@ -96,7 +95,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
96 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); 95 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
97 96
98 id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy); 97 id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy);
99 if (pstore_is_mounted()) 98 if (reason == KMSG_DUMP_OOPS && pstore_is_mounted())
100 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, 99 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id,
101 psinfo->buf, hsize + l1_cpy + l2_cpy, 100 psinfo->buf, hsize + l1_cpy + l2_cpy,
102 CURRENT_TIME, psinfo->erase); 101 CURRENT_TIME, psinfo->erase);
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index e63b4171d583..2b0646613f5a 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -335,7 +335,6 @@ static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
335static const struct address_space_operations qnx4_aops = { 335static const struct address_space_operations qnx4_aops = {
336 .readpage = qnx4_readpage, 336 .readpage = qnx4_readpage,
337 .writepage = qnx4_writepage, 337 .writepage = qnx4_writepage,
338 .sync_page = block_sync_page,
339 .write_begin = qnx4_write_begin, 338 .write_begin = qnx4_write_begin,
340 .write_end = generic_write_end, 339 .write_end = generic_write_end,
341 .bmap = qnx4_bmap 340 .bmap = qnx4_bmap
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index a2a622e079f0..fcc8ae75d874 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -76,7 +76,7 @@
76#include <linux/buffer_head.h> 76#include <linux/buffer_head.h>
77#include <linux/capability.h> 77#include <linux/capability.h>
78#include <linux/quotaops.h> 78#include <linux/quotaops.h>
79#include <linux/writeback.h> /* for inode_lock, oddly enough.. */ 79#include "../internal.h" /* ugh */
80 80
81#include <asm/uaccess.h> 81#include <asm/uaccess.h>
82 82
@@ -900,33 +900,38 @@ static void add_dquot_ref(struct super_block *sb, int type)
900 int reserved = 0; 900 int reserved = 0;
901#endif 901#endif
902 902
903 spin_lock(&inode_lock); 903 spin_lock(&inode_sb_list_lock);
904 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 904 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
905 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) 905 spin_lock(&inode->i_lock);
906 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
907 !atomic_read(&inode->i_writecount) ||
908 !dqinit_needed(inode, type)) {
909 spin_unlock(&inode->i_lock);
906 continue; 910 continue;
911 }
907#ifdef CONFIG_QUOTA_DEBUG 912#ifdef CONFIG_QUOTA_DEBUG
908 if (unlikely(inode_get_rsv_space(inode) > 0)) 913 if (unlikely(inode_get_rsv_space(inode) > 0))
909 reserved = 1; 914 reserved = 1;
910#endif 915#endif
911 if (!atomic_read(&inode->i_writecount))
912 continue;
913 if (!dqinit_needed(inode, type))
914 continue;
915
916 __iget(inode); 916 __iget(inode);
917 spin_unlock(&inode_lock); 917 spin_unlock(&inode->i_lock);
918 spin_unlock(&inode_sb_list_lock);
918 919
919 iput(old_inode); 920 iput(old_inode);
920 __dquot_initialize(inode, type); 921 __dquot_initialize(inode, type);
921 /* We hold a reference to 'inode' so it couldn't have been 922
922 * removed from s_inodes list while we dropped the inode_lock. 923 /*
923 * We cannot iput the inode now as we can be holding the last 924 * We hold a reference to 'inode' so it couldn't have been
924 * reference and we cannot iput it under inode_lock. So we 925 * removed from s_inodes list while we dropped the
925 * keep the reference and iput it later. */ 926 * inode_sb_list_lock We cannot iput the inode now as we can be
927 * holding the last reference and we cannot iput it under
928 * inode_sb_list_lock. So we keep the reference and iput it
929 * later.
930 */
926 old_inode = inode; 931 old_inode = inode;
927 spin_lock(&inode_lock); 932 spin_lock(&inode_sb_list_lock);
928 } 933 }
929 spin_unlock(&inode_lock); 934 spin_unlock(&inode_sb_list_lock);
930 iput(old_inode); 935 iput(old_inode);
931 936
932#ifdef CONFIG_QUOTA_DEBUG 937#ifdef CONFIG_QUOTA_DEBUG
@@ -1007,7 +1012,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
1007 struct inode *inode; 1012 struct inode *inode;
1008 int reserved = 0; 1013 int reserved = 0;
1009 1014
1010 spin_lock(&inode_lock); 1015 spin_lock(&inode_sb_list_lock);
1011 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1016 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
1012 /* 1017 /*
1013 * We have to scan also I_NEW inodes because they can already 1018 * We have to scan also I_NEW inodes because they can already
@@ -1021,7 +1026,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
1021 remove_inode_dquot_ref(inode, type, tofree_head); 1026 remove_inode_dquot_ref(inode, type, tofree_head);
1022 } 1027 }
1023 } 1028 }
1024 spin_unlock(&inode_lock); 1029 spin_unlock(&inode_sb_list_lock);
1025#ifdef CONFIG_QUOTA_DEBUG 1030#ifdef CONFIG_QUOTA_DEBUG
1026 if (reserved) { 1031 if (reserved) {
1027 printk(KERN_WARNING "VFS (%s): Writes happened after quota" 1032 printk(KERN_WARNING "VFS (%s): Writes happened after quota"
diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile
index 792b3cb2cd18..3c3b00165114 100644
--- a/fs/reiserfs/Makefile
+++ b/fs/reiserfs/Makefile
@@ -31,9 +31,7 @@ endif
31# and causing a panic. Since this behavior only affects ppc32, this ifeq 31# and causing a panic. Since this behavior only affects ppc32, this ifeq
32# will work around it. If any other architecture displays this behavior, 32# will work around it. If any other architecture displays this behavior,
33# add it here. 33# add it here.
34ifeq ($(CONFIG_PPC32),y) 34ccflags-$(CONFIG_PPC32) := $(call cc-ifversion, -lt, 0400, -O1)
35EXTRA_CFLAGS := $(call cc-ifversion, -lt, 0400, -O1)
36endif
37 35
38TAGS: 36TAGS:
39 etags *.c 37 etags *.c
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 1bba24bad820..4fd5bb33dbb5 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3217,7 +3217,6 @@ const struct address_space_operations reiserfs_address_space_operations = {
3217 .readpages = reiserfs_readpages, 3217 .readpages = reiserfs_readpages,
3218 .releasepage = reiserfs_releasepage, 3218 .releasepage = reiserfs_releasepage,
3219 .invalidatepage = reiserfs_invalidatepage, 3219 .invalidatepage = reiserfs_invalidatepage,
3220 .sync_page = block_sync_page,
3221 .write_begin = reiserfs_write_begin, 3220 .write_begin = reiserfs_write_begin,
3222 .write_end = reiserfs_write_end, 3221 .write_end = reiserfs_write_end,
3223 .bmap = reiserfs_aop_bmap, 3222 .bmap = reiserfs_aop_bmap,
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 79265fdc317a..4e153051bc75 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -59,7 +59,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
59 if (err) 59 if (err)
60 break; 60 break;
61 61
62 if (!is_owner_or_cap(inode)) { 62 if (!inode_owner_or_capable(inode)) {
63 err = -EPERM; 63 err = -EPERM;
64 goto setflags_out; 64 goto setflags_out;
65 } 65 }
@@ -103,7 +103,7 @@ setflags_out:
103 err = put_user(inode->i_generation, (int __user *)arg); 103 err = put_user(inode->i_generation, (int __user *)arg);
104 break; 104 break;
105 case REISERFS_IOC_SETVERSION: 105 case REISERFS_IOC_SETVERSION:
106 if (!is_owner_or_cap(inode)) { 106 if (!inode_owner_or_capable(inode)) {
107 err = -EPERM; 107 err = -EPERM;
108 break; 108 break;
109 } 109 }
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 90d2fcb67a31..3dc38f1206fc 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -26,7 +26,7 @@ posix_acl_set(struct dentry *dentry, const char *name, const void *value,
26 size_t jcreate_blocks; 26 size_t jcreate_blocks;
27 if (!reiserfs_posixacl(inode->i_sb)) 27 if (!reiserfs_posixacl(inode->i_sb))
28 return -EOPNOTSUPP; 28 return -EOPNOTSUPP;
29 if (!is_owner_or_cap(inode)) 29 if (!inode_owner_or_capable(inode))
30 return -EPERM; 30 return -EPERM;
31 31
32 if (value) { 32 if (value) {
diff --git a/fs/select.c b/fs/select.c
index e56560d2b08a..d33418fdc858 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -517,9 +517,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
517 * Update: ERESTARTSYS breaks at least the xview clock binary, so 517 * Update: ERESTARTSYS breaks at least the xview clock binary, so
518 * I'm trying ERESTARTNOHAND which restart only when you want to. 518 * I'm trying ERESTARTNOHAND which restart only when you want to.
519 */ 519 */
520#define MAX_SELECT_SECONDS \
521 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
522
523int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 520int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
524 fd_set __user *exp, struct timespec *end_time) 521 fd_set __user *exp, struct timespec *end_time)
525{ 522{
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index aa68a8a31518..efc309fa3035 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -5,12 +5,12 @@ config SQUASHFS
5 help 5 help
6 Saying Y here includes support for SquashFS 4.0 (a Compressed 6 Saying Y here includes support for SquashFS 4.0 (a Compressed
7 Read-Only File System). Squashfs is a highly compressed read-only 7 Read-Only File System). Squashfs is a highly compressed read-only
8 filesystem for Linux. It uses zlib/lzo compression to compress both 8 filesystem for Linux. It uses zlib, lzo or xz compression to
9 files, inodes and directories. Inodes in the system are very small 9 compress both files, inodes and directories. Inodes in the system
10 and all blocks are packed to minimise data overhead. Block sizes 10 are very small and all blocks are packed to minimise data overhead.
11 greater than 4K are supported up to a maximum of 1 Mbytes (default 11 Block sizes greater than 4K are supported up to a maximum of 1 Mbytes
12 block size 128K). SquashFS 4.0 supports 64 bit filesystems and files 12 (default block size 128K). SquashFS 4.0 supports 64 bit filesystems
13 (larger than 4GB), full uid/gid information, hard links and 13 and files (larger than 4GB), full uid/gid information, hard links and
14 timestamps. 14 timestamps.
15 15
16 Squashfs is intended for general read-only filesystem use, for 16 Squashfs is intended for general read-only filesystem use, for
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index a5940e54c4dd..e921bd213738 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -23,6 +23,7 @@
23 23
24#include <linux/types.h> 24#include <linux/types.h>
25#include <linux/mutex.h> 25#include <linux/mutex.h>
26#include <linux/slab.h>
26#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
27 28
28#include "squashfs_fs.h" 29#include "squashfs_fs.h"
@@ -74,3 +75,36 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
74 75
75 return decompressor[i]; 76 return decompressor[i];
76} 77}
78
79
80void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
81{
82 struct squashfs_sb_info *msblk = sb->s_fs_info;
83 void *strm, *buffer = NULL;
84 int length = 0;
85
86 /*
87 * Read decompressor specific options from file system if present
88 */
89 if (SQUASHFS_COMP_OPTS(flags)) {
90 buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
91 if (buffer == NULL)
92 return ERR_PTR(-ENOMEM);
93
94 length = squashfs_read_data(sb, &buffer,
95 sizeof(struct squashfs_super_block), 0, NULL,
96 PAGE_CACHE_SIZE, 1);
97
98 if (length < 0) {
99 strm = ERR_PTR(length);
100 goto finished;
101 }
102 }
103
104 strm = msblk->decompressor->init(msblk, buffer, length);
105
106finished:
107 kfree(buffer);
108
109 return strm;
110}
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index 3b305a70f7aa..099745ad5691 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -24,7 +24,7 @@
24 */ 24 */
25 25
26struct squashfs_decompressor { 26struct squashfs_decompressor {
27 void *(*init)(struct squashfs_sb_info *); 27 void *(*init)(struct squashfs_sb_info *, void *, int);
28 void (*free)(void *); 28 void (*free)(void *);
29 int (*decompress)(struct squashfs_sb_info *, void **, 29 int (*decompress)(struct squashfs_sb_info *, void **,
30 struct buffer_head **, int, int, int, int, int); 30 struct buffer_head **, int, int, int, int, int);
@@ -33,11 +33,6 @@ struct squashfs_decompressor {
33 int supported; 33 int supported;
34}; 34};
35 35
36static inline void *squashfs_decompressor_init(struct squashfs_sb_info *msblk)
37{
38 return msblk->decompressor->init(msblk);
39}
40
41static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk, 36static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
42 void *s) 37 void *s)
43{ 38{
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index 0dc340aa2be9..3f79cd1d0c19 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -172,6 +172,11 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
172 length += sizeof(dirh); 172 length += sizeof(dirh);
173 173
174 dir_count = le32_to_cpu(dirh.count) + 1; 174 dir_count = le32_to_cpu(dirh.count) + 1;
175
176 /* dir_count should never be larger than 256 */
177 if (dir_count > 256)
178 goto failed_read;
179
175 while (dir_count--) { 180 while (dir_count--) {
176 /* 181 /*
177 * Read directory entry. 182 * Read directory entry.
@@ -183,6 +188,10 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
183 188
184 size = le16_to_cpu(dire->size) + 1; 189 size = le16_to_cpu(dire->size) + 1;
185 190
191 /* size should never be larger than SQUASHFS_NAME_LEN */
192 if (size > SQUASHFS_NAME_LEN)
193 goto failed_read;
194
186 err = squashfs_read_metadata(inode->i_sb, dire->name, 195 err = squashfs_read_metadata(inode->i_sb, dire->name,
187 &block, &offset, size); 196 &block, &offset, size);
188 if (err < 0) 197 if (err < 0)
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 7da759e34c52..00f4dfc5f088 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -37,7 +37,7 @@ struct squashfs_lzo {
37 void *output; 37 void *output;
38}; 38};
39 39
40static void *lzo_init(struct squashfs_sb_info *msblk) 40static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len)
41{ 41{
42 int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); 42 int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
43 43
@@ -58,7 +58,7 @@ failed2:
58failed: 58failed:
59 ERROR("Failed to allocate lzo workspace\n"); 59 ERROR("Failed to allocate lzo workspace\n");
60 kfree(stream); 60 kfree(stream);
61 return NULL; 61 return ERR_PTR(-ENOMEM);
62} 62}
63 63
64 64
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 7a9464d08cf6..5d922a6701ab 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -176,6 +176,11 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
176 length += sizeof(dirh); 176 length += sizeof(dirh);
177 177
178 dir_count = le32_to_cpu(dirh.count) + 1; 178 dir_count = le32_to_cpu(dirh.count) + 1;
179
180 /* dir_count should never be larger than 256 */
181 if (dir_count > 256)
182 goto data_error;
183
179 while (dir_count--) { 184 while (dir_count--) {
180 /* 185 /*
181 * Read directory entry. 186 * Read directory entry.
@@ -187,6 +192,10 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
187 192
188 size = le16_to_cpu(dire->size) + 1; 193 size = le16_to_cpu(dire->size) + 1;
189 194
195 /* size should never be larger than SQUASHFS_NAME_LEN */
196 if (size > SQUASHFS_NAME_LEN)
197 goto data_error;
198
190 err = squashfs_read_metadata(dir->i_sb, dire->name, 199 err = squashfs_read_metadata(dir->i_sb, dire->name,
191 &block, &offset, size); 200 &block, &offset, size);
192 if (err < 0) 201 if (err < 0)
@@ -228,6 +237,9 @@ exit_lookup:
228 d_add(dentry, inode); 237 d_add(dentry, inode);
229 return ERR_PTR(0); 238 return ERR_PTR(0);
230 239
240data_error:
241 err = -EIO;
242
231read_failure: 243read_failure:
232 ERROR("Unable to read directory block [%llx:%x]\n", 244 ERROR("Unable to read directory block [%llx:%x]\n",
233 squashfs_i(dir)->start + msblk->directory_table, 245 squashfs_i(dir)->start + msblk->directory_table,
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index ba729d808876..1f2e608b8785 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -48,6 +48,7 @@ extern int squashfs_read_table(struct super_block *, void *, u64, int);
48 48
49/* decompressor.c */ 49/* decompressor.c */
50extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); 50extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
51extern void *squashfs_decompressor_init(struct super_block *, unsigned short);
51 52
52/* export.c */ 53/* export.c */
53extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, 54extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64,
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 39533feffd6d..4582c568ef4d 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -57,6 +57,7 @@
57#define SQUASHFS_ALWAYS_FRAG 5 57#define SQUASHFS_ALWAYS_FRAG 5
58#define SQUASHFS_DUPLICATE 6 58#define SQUASHFS_DUPLICATE 6
59#define SQUASHFS_EXPORT 7 59#define SQUASHFS_EXPORT 7
60#define SQUASHFS_COMP_OPT 10
60 61
61#define SQUASHFS_BIT(flag, bit) ((flag >> bit) & 1) 62#define SQUASHFS_BIT(flag, bit) ((flag >> bit) & 1)
62 63
@@ -81,6 +82,9 @@
81#define SQUASHFS_EXPORTABLE(flags) SQUASHFS_BIT(flags, \ 82#define SQUASHFS_EXPORTABLE(flags) SQUASHFS_BIT(flags, \
82 SQUASHFS_EXPORT) 83 SQUASHFS_EXPORT)
83 84
85#define SQUASHFS_COMP_OPTS(flags) SQUASHFS_BIT(flags, \
86 SQUASHFS_COMP_OPT)
87
84/* Max number of types and file types */ 88/* Max number of types and file types */
85#define SQUASHFS_DIR_TYPE 1 89#define SQUASHFS_DIR_TYPE 1
86#define SQUASHFS_REG_TYPE 2 90#define SQUASHFS_REG_TYPE 2
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 20700b9f2b4c..5c8184c061a4 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -199,10 +199,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
199 199
200 err = -ENOMEM; 200 err = -ENOMEM;
201 201
202 msblk->stream = squashfs_decompressor_init(msblk);
203 if (msblk->stream == NULL)
204 goto failed_mount;
205
206 msblk->block_cache = squashfs_cache_init("metadata", 202 msblk->block_cache = squashfs_cache_init("metadata",
207 SQUASHFS_CACHED_BLKS, SQUASHFS_METADATA_SIZE); 203 SQUASHFS_CACHED_BLKS, SQUASHFS_METADATA_SIZE);
208 if (msblk->block_cache == NULL) 204 if (msblk->block_cache == NULL)
@@ -215,6 +211,13 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
215 goto failed_mount; 211 goto failed_mount;
216 } 212 }
217 213
214 msblk->stream = squashfs_decompressor_init(sb, flags);
215 if (IS_ERR(msblk->stream)) {
216 err = PTR_ERR(msblk->stream);
217 msblk->stream = NULL;
218 goto failed_mount;
219 }
220
218 /* Allocate and read id index table */ 221 /* Allocate and read id index table */
219 msblk->id_table = squashfs_read_id_index_table(sb, 222 msblk->id_table = squashfs_read_id_index_table(sb,
220 le64_to_cpu(sblk->id_table_start), le16_to_cpu(sblk->no_ids)); 223 le64_to_cpu(sblk->id_table_start), le16_to_cpu(sblk->no_ids));
@@ -370,8 +373,8 @@ static void squashfs_put_super(struct super_block *sb)
370} 373}
371 374
372 375
373static struct dentry *squashfs_mount(struct file_system_type *fs_type, int flags, 376static struct dentry *squashfs_mount(struct file_system_type *fs_type,
374 const char *dev_name, void *data) 377 int flags, const char *dev_name, void *data)
375{ 378{
376 return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super); 379 return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super);
377} 380}
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index c4eb40018256..aa47a286d1f8 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -26,10 +26,10 @@
26#include <linux/buffer_head.h> 26#include <linux/buffer_head.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/xz.h> 28#include <linux/xz.h>
29#include <linux/bitops.h>
29 30
30#include "squashfs_fs.h" 31#include "squashfs_fs.h"
31#include "squashfs_fs_sb.h" 32#include "squashfs_fs_sb.h"
32#include "squashfs_fs_i.h"
33#include "squashfs.h" 33#include "squashfs.h"
34#include "decompressor.h" 34#include "decompressor.h"
35 35
@@ -38,24 +38,57 @@ struct squashfs_xz {
38 struct xz_buf buf; 38 struct xz_buf buf;
39}; 39};
40 40
41static void *squashfs_xz_init(struct squashfs_sb_info *msblk) 41struct comp_opts {
42 __le32 dictionary_size;
43 __le32 flags;
44};
45
46static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
47 int len)
42{ 48{
43 int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); 49 struct comp_opts *comp_opts = buff;
50 struct squashfs_xz *stream;
51 int dict_size = msblk->block_size;
52 int err, n;
53
54 if (comp_opts) {
55 /* check compressor options are the expected length */
56 if (len < sizeof(*comp_opts)) {
57 err = -EIO;
58 goto failed;
59 }
44 60
45 struct squashfs_xz *stream = kmalloc(sizeof(*stream), GFP_KERNEL); 61 dict_size = le32_to_cpu(comp_opts->dictionary_size);
46 if (stream == NULL) 62
63 /* the dictionary size should be 2^n or 2^n+2^(n+1) */
64 n = ffs(dict_size) - 1;
65 if (dict_size != (1 << n) && dict_size != (1 << n) +
66 (1 << (n + 1))) {
67 err = -EIO;
68 goto failed;
69 }
70 }
71
72 dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE);
73
74 stream = kmalloc(sizeof(*stream), GFP_KERNEL);
75 if (stream == NULL) {
76 err = -ENOMEM;
47 goto failed; 77 goto failed;
78 }
48 79
49 stream->state = xz_dec_init(XZ_PREALLOC, block_size); 80 stream->state = xz_dec_init(XZ_PREALLOC, dict_size);
50 if (stream->state == NULL) 81 if (stream->state == NULL) {
82 kfree(stream);
83 err = -ENOMEM;
51 goto failed; 84 goto failed;
85 }
52 86
53 return stream; 87 return stream;
54 88
55failed: 89failed:
56 ERROR("Failed to allocate xz workspace\n"); 90 ERROR("Failed to initialise xz decompressor\n");
57 kfree(stream); 91 return ERR_PTR(err);
58 return NULL;
59} 92}
60 93
61 94
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 4661ae2b1cec..517688b32ffa 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -26,19 +26,19 @@
26#include <linux/buffer_head.h> 26#include <linux/buffer_head.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/zlib.h> 28#include <linux/zlib.h>
29#include <linux/vmalloc.h>
29 30
30#include "squashfs_fs.h" 31#include "squashfs_fs.h"
31#include "squashfs_fs_sb.h" 32#include "squashfs_fs_sb.h"
32#include "squashfs.h" 33#include "squashfs.h"
33#include "decompressor.h" 34#include "decompressor.h"
34 35
35static void *zlib_init(struct squashfs_sb_info *dummy) 36static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len)
36{ 37{
37 z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL); 38 z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL);
38 if (stream == NULL) 39 if (stream == NULL)
39 goto failed; 40 goto failed;
40 stream->workspace = kmalloc(zlib_inflate_workspacesize(), 41 stream->workspace = vmalloc(zlib_inflate_workspacesize());
41 GFP_KERNEL);
42 if (stream->workspace == NULL) 42 if (stream->workspace == NULL)
43 goto failed; 43 goto failed;
44 44
@@ -47,7 +47,7 @@ static void *zlib_init(struct squashfs_sb_info *dummy)
47failed: 47failed:
48 ERROR("Failed to allocate zlib workspace\n"); 48 ERROR("Failed to allocate zlib workspace\n");
49 kfree(stream); 49 kfree(stream);
50 return NULL; 50 return ERR_PTR(-ENOMEM);
51} 51}
52 52
53 53
@@ -56,7 +56,7 @@ static void zlib_free(void *strm)
56 z_stream *stream = strm; 56 z_stream *stream = strm;
57 57
58 if (stream) 58 if (stream)
59 kfree(stream->workspace); 59 vfree(stream->workspace);
60 kfree(stream); 60 kfree(stream);
61} 61}
62 62
diff --git a/fs/super.c b/fs/super.c
index e84864908264..8a06881b1920 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -71,6 +71,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
71#else 71#else
72 INIT_LIST_HEAD(&s->s_files); 72 INIT_LIST_HEAD(&s->s_files);
73#endif 73#endif
74 s->s_bdi = &default_backing_dev_info;
74 INIT_LIST_HEAD(&s->s_instances); 75 INIT_LIST_HEAD(&s->s_instances);
75 INIT_HLIST_BL_HEAD(&s->s_anon); 76 INIT_HLIST_BL_HEAD(&s->s_anon);
76 INIT_LIST_HEAD(&s->s_inodes); 77 INIT_LIST_HEAD(&s->s_inodes);
@@ -936,6 +937,7 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
936 sb = root->d_sb; 937 sb = root->d_sb;
937 BUG_ON(!sb); 938 BUG_ON(!sb);
938 WARN_ON(!sb->s_bdi); 939 WARN_ON(!sb->s_bdi);
940 WARN_ON(sb->s_bdi == &default_backing_dev_info);
939 sb->s_flags |= MS_BORN; 941 sb->s_flags |= MS_BORN;
940 942
941 error = security_sb_kern_mount(sb, flags, secdata); 943 error = security_sb_kern_mount(sb, flags, secdata);
diff --git a/fs/sync.c b/fs/sync.c
index ba76b9623e7e..c38ec163da6c 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -7,6 +7,7 @@
7#include <linux/fs.h> 7#include <linux/fs.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/namei.h>
10#include <linux/sched.h> 11#include <linux/sched.h>
11#include <linux/writeback.h> 12#include <linux/writeback.h>
12#include <linux/syscalls.h> 13#include <linux/syscalls.h>
@@ -33,7 +34,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
33 * This should be safe, as we require bdi backing to actually 34 * This should be safe, as we require bdi backing to actually
34 * write out data in the first place 35 * write out data in the first place
35 */ 36 */
36 if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info) 37 if (sb->s_bdi == &noop_backing_dev_info)
37 return 0; 38 return 0;
38 39
39 if (sb->s_qcop && sb->s_qcop->quota_sync) 40 if (sb->s_qcop && sb->s_qcop->quota_sync)
@@ -79,7 +80,7 @@ EXPORT_SYMBOL_GPL(sync_filesystem);
79 80
80static void sync_one_sb(struct super_block *sb, void *arg) 81static void sync_one_sb(struct super_block *sb, void *arg)
81{ 82{
82 if (!(sb->s_flags & MS_RDONLY) && sb->s_bdi) 83 if (!(sb->s_flags & MS_RDONLY))
83 __sync_filesystem(sb, *(int *)arg); 84 __sync_filesystem(sb, *(int *)arg);
84} 85}
85/* 86/*
@@ -128,6 +129,29 @@ void emergency_sync(void)
128 } 129 }
129} 130}
130 131
132/*
133 * sync a single super
134 */
135SYSCALL_DEFINE1(syncfs, int, fd)
136{
137 struct file *file;
138 struct super_block *sb;
139 int ret;
140 int fput_needed;
141
142 file = fget_light(fd, &fput_needed);
143 if (!file)
144 return -EBADF;
145 sb = file->f_dentry->d_sb;
146
147 down_read(&sb->s_umount);
148 ret = sync_filesystem(sb);
149 up_read(&sb->s_umount);
150
151 fput_light(file, fput_needed);
152 return ret;
153}
154
131/** 155/**
132 * vfs_fsync_range - helper to sync a range of data & metadata to disk 156 * vfs_fsync_range - helper to sync a range of data & metadata to disk
133 * @file: file to sync 157 * @file: file to sync
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 9ca66276315e..fa8d43c92bb8 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -488,7 +488,6 @@ static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
488const struct address_space_operations sysv_aops = { 488const struct address_space_operations sysv_aops = {
489 .readpage = sysv_readpage, 489 .readpage = sysv_readpage,
490 .writepage = sysv_writepage, 490 .writepage = sysv_writepage,
491 .sync_page = block_sync_page,
492 .write_begin = sysv_write_begin, 491 .write_begin = sysv_write_begin,
493 .write_end = generic_write_end, 492 .write_end = generic_write_end,
494 .bmap = sysv_bmap 493 .bmap = sysv_bmap
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index 1d1859dc3de5..d7440904be17 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -58,12 +58,3 @@ config UBIFS_FS_DEBUG
58 down UBIFS. You can then further enable / disable individual debugging 58 down UBIFS. You can then further enable / disable individual debugging
59 features using UBIFS module parameters and the corresponding sysfs 59 features using UBIFS module parameters and the corresponding sysfs
60 interfaces. 60 interfaces.
61
62config UBIFS_FS_DEBUG_CHKS
63 bool "Enable extra checks"
64 depends on UBIFS_FS_DEBUG
65 help
66 If extra checks are enabled UBIFS will check the consistency of its
67 internal data structures during operation. However, UBIFS performance
68 is dramatically slower when this option is selected especially if the
69 file system is large.
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 01c2b028e525..f25a7339f800 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -818,7 +818,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
818 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", 818 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
819 current->pid, lnum); 819 current->pid, lnum);
820 820
821 buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL); 821 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
822 if (!buf) { 822 if (!buf) {
823 ubifs_err("cannot allocate memory for dumping LEB %d", lnum); 823 ubifs_err("cannot allocate memory for dumping LEB %d", lnum);
824 return; 824 return;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index d77db7e36484..28be1e6a65e8 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -448,10 +448,12 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
448 if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) { 448 if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) {
449 /* 449 /*
450 * We change whole page so no need to load it. But we 450 * We change whole page so no need to load it. But we
451 * have to set the @PG_checked flag to make the further 451 * do not know whether this page exists on the media or
452 * code know that the page is new. This might be not 452 * not, so we assume the latter because it requires
453 * true, but it is better to budget more than to read 453 * larger budget. The assumption is that it is better
454 * the page from the media. 454 * to budget a bit more than to read the page from the
455 * media. Thus, we are setting the @PG_checked flag
456 * here.
455 */ 457 */
456 SetPageChecked(page); 458 SetPageChecked(page);
457 skipped_read = 1; 459 skipped_read = 1;
@@ -559,6 +561,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
559 dbg_gen("copied %d instead of %d, read page and repeat", 561 dbg_gen("copied %d instead of %d, read page and repeat",
560 copied, len); 562 copied, len);
561 cancel_budget(c, page, ui, appending); 563 cancel_budget(c, page, ui, appending);
564 ClearPageChecked(page);
562 565
563 /* 566 /*
564 * Return 0 to force VFS to repeat the whole operation, or the 567 * Return 0 to force VFS to repeat the whole operation, or the
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 8aacd64957a2..548acf494afd 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -160,7 +160,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
160 if (IS_RDONLY(inode)) 160 if (IS_RDONLY(inode))
161 return -EROFS; 161 return -EROFS;
162 162
163 if (!is_owner_or_cap(inode)) 163 if (!inode_owner_or_capable(inode))
164 return -EACCES; 164 return -EACCES;
165 165
166 if (get_user(flags, (int __user *) arg)) 166 if (get_user(flags, (int __user *) arg))
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index c7b25e2f7764..0ee0847f2421 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -1094,7 +1094,7 @@ static int scan_check_cb(struct ubifs_info *c,
1094 } 1094 }
1095 } 1095 }
1096 1096
1097 buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL); 1097 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1098 if (!buf) { 1098 if (!buf) {
1099 ubifs_err("cannot allocate memory to scan LEB %d", lnum); 1099 ubifs_err("cannot allocate memory to scan LEB %d", lnum);
1100 goto out; 1100 goto out;
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 0a3c2c3f5c4a..0c9c69bd983a 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1633,7 +1633,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1633 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) 1633 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1634 return 0; 1634 return 0;
1635 1635
1636 buf = p = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL); 1636 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1637 if (!buf) { 1637 if (!buf) {
1638 ubifs_err("cannot allocate memory for ltab checking"); 1638 ubifs_err("cannot allocate memory for ltab checking");
1639 return 0; 1639 return 0;
@@ -1885,7 +1885,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1885 1885
1886 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", 1886 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
1887 current->pid, lnum); 1887 current->pid, lnum);
1888 buf = p = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL); 1888 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1889 if (!buf) { 1889 if (!buf) {
1890 ubifs_err("cannot allocate memory to dump LPT"); 1890 ubifs_err("cannot allocate memory to dump LPT");
1891 return; 1891 return;
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 2cdbd31641d7..09df318e368f 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -898,7 +898,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
898 if (c->no_orphs) 898 if (c->no_orphs)
899 return 0; 899 return 0;
900 900
901 buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL); 901 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
902 if (!buf) { 902 if (!buf) {
903 ubifs_err("cannot allocate memory to check orphans"); 903 ubifs_err("cannot allocate memory to check orphans");
904 return 0; 904 return 0;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index e5dc1e120e8d..6ddd9973e681 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2011,7 +2011,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
2011 */ 2011 */
2012 c->bdi.name = "ubifs", 2012 c->bdi.name = "ubifs",
2013 c->bdi.capabilities = BDI_CAP_MAP_COPY; 2013 c->bdi.capabilities = BDI_CAP_MAP_COPY;
2014 c->bdi.unplug_io_fn = default_unplug_io_fn;
2015 err = bdi_init(&c->bdi); 2014 err = bdi_init(&c->bdi);
2016 if (err) 2015 if (err)
2017 goto out_close; 2016 goto out_close;
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 8994dd041660..95518a9f589e 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -27,11 +27,10 @@
27#include "udf_i.h" 27#include "udf_i.h"
28#include "udf_sb.h" 28#include "udf_sb.h"
29 29
30#define udf_clear_bit(nr, addr) ext2_clear_bit(nr, addr) 30#define udf_clear_bit __test_and_clear_bit_le
31#define udf_set_bit(nr, addr) ext2_set_bit(nr, addr) 31#define udf_set_bit __test_and_set_bit_le
32#define udf_test_bit(nr, addr) ext2_test_bit(nr, addr) 32#define udf_test_bit test_bit_le
33#define udf_find_next_one_bit(addr, size, offset) \ 33#define udf_find_next_one_bit find_next_bit_le
34 ext2_find_next_bit((unsigned long *)(addr), size, offset)
35 34
36static int read_block_bitmap(struct super_block *sb, 35static int read_block_bitmap(struct super_block *sb,
37 struct udf_bitmap *bitmap, unsigned int block, 36 struct udf_bitmap *bitmap, unsigned int block,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index f391a2adc699..2a346bb1d9f5 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -98,7 +98,6 @@ static int udf_adinicb_write_end(struct file *file,
98const struct address_space_operations udf_adinicb_aops = { 98const struct address_space_operations udf_adinicb_aops = {
99 .readpage = udf_adinicb_readpage, 99 .readpage = udf_adinicb_readpage,
100 .writepage = udf_adinicb_writepage, 100 .writepage = udf_adinicb_writepage,
101 .sync_page = block_sync_page,
102 .write_begin = simple_write_begin, 101 .write_begin = simple_write_begin,
103 .write_end = udf_adinicb_write_end, 102 .write_end = udf_adinicb_write_end,
104}; 103};
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index ccc814321414..1d1358ed80c1 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -140,7 +140,6 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block)
140const struct address_space_operations udf_aops = { 140const struct address_space_operations udf_aops = {
141 .readpage = udf_readpage, 141 .readpage = udf_readpage,
142 .writepage = udf_writepage, 142 .writepage = udf_writepage,
143 .sync_page = block_sync_page,
144 .write_begin = udf_write_begin, 143 .write_begin = udf_write_begin,
145 .write_end = generic_write_end, 144 .write_end = generic_write_end,
146 .bmap = udf_bmap, 145 .bmap = udf_bmap,
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 03c255f12df5..27a4babe7df0 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -552,7 +552,6 @@ static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
552const struct address_space_operations ufs_aops = { 552const struct address_space_operations ufs_aops = {
553 .readpage = ufs_readpage, 553 .readpage = ufs_readpage,
554 .writepage = ufs_writepage, 554 .writepage = ufs_writepage,
555 .sync_page = block_sync_page,
556 .write_begin = ufs_write_begin, 555 .write_begin = ufs_write_begin,
557 .write_end = generic_write_end, 556 .write_end = generic_write_end,
558 .bmap = ufs_bmap 557 .bmap = ufs_bmap
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index e56a4f567212..11014302c9ca 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -479,7 +479,7 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
479 break; 479 break;
480 if (IS_SYNC(inode) && (inode->i_state & I_DIRTY)) 480 if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
481 ufs_sync_inode (inode); 481 ufs_sync_inode (inode);
482 blk_run_address_space(inode->i_mapping); 482 blk_flush_plug(current);
483 yield(); 483 yield();
484 } 484 }
485 485
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 9f8775ce381c..954175928240 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -408,7 +408,7 @@ static inline unsigned _ubh_find_next_zero_bit_(
408 for (;;) { 408 for (;;) {
409 count = min_t(unsigned int, size + offset, uspi->s_bpf); 409 count = min_t(unsigned int, size + offset, uspi->s_bpf);
410 size -= count - offset; 410 size -= count - offset;
411 pos = ext2_find_next_zero_bit (ubh->bh[base]->b_data, count, offset); 411 pos = find_next_zero_bit_le(ubh->bh[base]->b_data, count, offset);
412 if (pos < count || !size) 412 if (pos < count || !size)
413 break; 413 break;
414 base++; 414 base++;
diff --git a/fs/utimes.c b/fs/utimes.c
index 179b58690657..ba653f3dc1bc 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -95,7 +95,7 @@ static int utimes_common(struct path *path, struct timespec *times)
95 if (IS_IMMUTABLE(inode)) 95 if (IS_IMMUTABLE(inode))
96 goto mnt_drop_write_and_out; 96 goto mnt_drop_write_and_out;
97 97
98 if (!is_owner_or_cap(inode)) { 98 if (!inode_owner_or_capable(inode)) {
99 error = inode_permission(inode, MAY_WRITE); 99 error = inode_permission(inode, MAY_WRITE);
100 if (error) 100 if (error)
101 goto mnt_drop_write_and_out; 101 goto mnt_drop_write_and_out;
diff --git a/fs/xattr.c b/fs/xattr.c
index 01bb8135e14a..a19acdb81cd1 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -59,7 +59,7 @@ xattr_permission(struct inode *inode, const char *name, int mask)
59 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) 59 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
60 return -EPERM; 60 return -EPERM;
61 if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && 61 if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
62 (mask & MAY_WRITE) && !is_owner_or_cap(inode)) 62 (mask & MAY_WRITE) && !inode_owner_or_capable(inode))
63 return -EPERM; 63 return -EPERM;
64 } 64 }
65 65
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index faca44997099..284a7c89697e 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -16,14 +16,11 @@
16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17# 17#
18 18
19EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6 19ccflags-y := -I$(src) -I$(src)/linux-2.6
20ccflags-$(CONFIG_XFS_DEBUG) += -g
20 21
21XFS_LINUX := linux-2.6 22XFS_LINUX := linux-2.6
22 23
23ifeq ($(CONFIG_XFS_DEBUG),y)
24 EXTRA_CFLAGS += -g
25endif
26
27obj-$(CONFIG_XFS_FS) += xfs.o 24obj-$(CONFIG_XFS_FS) += xfs.o
28 25
29xfs-y += linux-2.6/xfs_trace.o 26xfs-y += linux-2.6/xfs_trace.o
@@ -105,11 +102,10 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
105 xfs_globals.o \ 102 xfs_globals.o \
106 xfs_ioctl.o \ 103 xfs_ioctl.o \
107 xfs_iops.o \ 104 xfs_iops.o \
105 xfs_message.o \
108 xfs_super.o \ 106 xfs_super.o \
109 xfs_sync.o \ 107 xfs_sync.o \
110 xfs_xattr.o) 108 xfs_xattr.o)
111 109
112# Objects in support/ 110# Objects in support/
113xfs-y += $(addprefix support/, \ 111xfs-y += support/uuid.o
114 debug.o \
115 uuid.o)
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 666c9db48eb6..a907de565db3 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -23,6 +23,7 @@
23#include <linux/backing-dev.h> 23#include <linux/backing-dev.h>
24#include "time.h" 24#include "time.h"
25#include "kmem.h" 25#include "kmem.h"
26#include "xfs_message.h"
26 27
27/* 28/*
28 * Greedy allocation. May fail and may return vmalloced memory. 29 * Greedy allocation. May fail and may return vmalloced memory.
@@ -56,8 +57,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
56 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) 57 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
57 return ptr; 58 return ptr;
58 if (!(++retries % 100)) 59 if (!(++retries % 100))
59 printk(KERN_ERR "XFS: possible memory allocation " 60 xfs_err(NULL,
60 "deadlock in %s (mode:0x%x)\n", 61 "possible memory allocation deadlock in %s (mode:0x%x)",
61 __func__, lflags); 62 __func__, lflags);
62 congestion_wait(BLK_RW_ASYNC, HZ/50); 63 congestion_wait(BLK_RW_ASYNC, HZ/50);
63 } while (1); 64 } while (1);
@@ -112,8 +113,8 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
112 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) 113 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
113 return ptr; 114 return ptr;
114 if (!(++retries % 100)) 115 if (!(++retries % 100))
115 printk(KERN_ERR "XFS: possible memory allocation " 116 xfs_err(NULL,
116 "deadlock in %s (mode:0x%x)\n", 117 "possible memory allocation deadlock in %s (mode:0x%x)",
117 __func__, lflags); 118 __func__, lflags);
118 congestion_wait(BLK_RW_ASYNC, HZ/50); 119 congestion_wait(BLK_RW_ASYNC, HZ/50);
119 } while (1); 120 } while (1);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index ec7bbb5645b6..52dbd14260ba 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -413,8 +413,7 @@ xfs_submit_ioend_bio(
413 if (xfs_ioend_new_eof(ioend)) 413 if (xfs_ioend_new_eof(ioend))
414 xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); 414 xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
415 415
416 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? 416 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
417 WRITE_SYNC_PLUG : WRITE, bio);
418} 417}
419 418
420STATIC struct bio * 419STATIC struct bio *
@@ -854,7 +853,7 @@ xfs_aops_discard_page(
854 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 853 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
855 goto out_invalidate; 854 goto out_invalidate;
856 855
857 xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 856 xfs_alert(ip->i_mount,
858 "page discard on page %p, inode 0x%llx, offset %llu.", 857 "page discard on page %p, inode 0x%llx, offset %llu.",
859 page, ip->i_ino, offset); 858 page, ip->i_ino, offset);
860 859
@@ -872,7 +871,7 @@ xfs_aops_discard_page(
872 if (error) { 871 if (error) {
873 /* something screwed, just bail */ 872 /* something screwed, just bail */
874 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 873 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
875 xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 874 xfs_alert(ip->i_mount,
876 "page discard unable to remove delalloc mapping."); 875 "page discard unable to remove delalloc mapping.");
877 } 876 }
878 break; 877 break;
@@ -1411,7 +1410,7 @@ xfs_vm_write_failed(
1411 if (error) { 1410 if (error) {
1412 /* something screwed, just bail */ 1411 /* something screwed, just bail */
1413 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 1412 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
1414 xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 1413 xfs_alert(ip->i_mount,
1415 "xfs_vm_write_failed: unable to clean up ino %lld", 1414 "xfs_vm_write_failed: unable to clean up ino %lld",
1416 ip->i_ino); 1415 ip->i_ino);
1417 } 1416 }
@@ -1495,7 +1494,6 @@ const struct address_space_operations xfs_address_space_operations = {
1495 .readpages = xfs_vm_readpages, 1494 .readpages = xfs_vm_readpages,
1496 .writepage = xfs_vm_writepage, 1495 .writepage = xfs_vm_writepage,
1497 .writepages = xfs_vm_writepages, 1496 .writepages = xfs_vm_writepages,
1498 .sync_page = block_sync_page,
1499 .releasepage = xfs_vm_releasepage, 1497 .releasepage = xfs_vm_releasepage,
1500 .invalidatepage = xfs_vm_invalidatepage, 1498 .invalidatepage = xfs_vm_invalidatepage,
1501 .write_begin = xfs_vm_write_begin, 1499 .write_begin = xfs_vm_write_begin,
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index f83a4c830a65..596bb2c9de42 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -94,75 +94,6 @@ xfs_buf_vmap_len(
94} 94}
95 95
96/* 96/*
97 * Page Region interfaces.
98 *
99 * For pages in filesystems where the blocksize is smaller than the
100 * pagesize, we use the page->private field (long) to hold a bitmap
101 * of uptodate regions within the page.
102 *
103 * Each such region is "bytes per page / bits per long" bytes long.
104 *
105 * NBPPR == number-of-bytes-per-page-region
106 * BTOPR == bytes-to-page-region (rounded up)
107 * BTOPRT == bytes-to-page-region-truncated (rounded down)
108 */
109#if (BITS_PER_LONG == 32)
110#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */
111#elif (BITS_PER_LONG == 64)
112#define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */
113#else
114#error BITS_PER_LONG must be 32 or 64
115#endif
116#define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG)
117#define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)
118#define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT))
119
120STATIC unsigned long
121page_region_mask(
122 size_t offset,
123 size_t length)
124{
125 unsigned long mask;
126 int first, final;
127
128 first = BTOPR(offset);
129 final = BTOPRT(offset + length - 1);
130 first = min(first, final);
131
132 mask = ~0UL;
133 mask <<= BITS_PER_LONG - (final - first);
134 mask >>= BITS_PER_LONG - (final);
135
136 ASSERT(offset + length <= PAGE_CACHE_SIZE);
137 ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);
138
139 return mask;
140}
141
142STATIC void
143set_page_region(
144 struct page *page,
145 size_t offset,
146 size_t length)
147{
148 set_page_private(page,
149 page_private(page) | page_region_mask(offset, length));
150 if (page_private(page) == ~0UL)
151 SetPageUptodate(page);
152}
153
154STATIC int
155test_page_region(
156 struct page *page,
157 size_t offset,
158 size_t length)
159{
160 unsigned long mask = page_region_mask(offset, length);
161
162 return (mask && (page_private(page) & mask) == mask);
163}
164
165/*
166 * xfs_buf_lru_add - add a buffer to the LRU. 97 * xfs_buf_lru_add - add a buffer to the LRU.
167 * 98 *
168 * The LRU takes a new reference to the buffer so that it will only be freed 99 * The LRU takes a new reference to the buffer so that it will only be freed
@@ -332,7 +263,7 @@ xfs_buf_free(
332 263
333 ASSERT(list_empty(&bp->b_lru)); 264 ASSERT(list_empty(&bp->b_lru));
334 265
335 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { 266 if (bp->b_flags & _XBF_PAGES) {
336 uint i; 267 uint i;
337 268
338 if (xfs_buf_is_vmapped(bp)) 269 if (xfs_buf_is_vmapped(bp))
@@ -342,25 +273,22 @@ xfs_buf_free(
342 for (i = 0; i < bp->b_page_count; i++) { 273 for (i = 0; i < bp->b_page_count; i++) {
343 struct page *page = bp->b_pages[i]; 274 struct page *page = bp->b_pages[i];
344 275
345 if (bp->b_flags & _XBF_PAGE_CACHE) 276 __free_page(page);
346 ASSERT(!PagePrivate(page));
347 page_cache_release(page);
348 } 277 }
349 } 278 } else if (bp->b_flags & _XBF_KMEM)
279 kmem_free(bp->b_addr);
350 _xfs_buf_free_pages(bp); 280 _xfs_buf_free_pages(bp);
351 xfs_buf_deallocate(bp); 281 xfs_buf_deallocate(bp);
352} 282}
353 283
354/* 284/*
355 * Finds all pages for buffer in question and builds it's page list. 285 * Allocates all the pages for buffer in question and builds it's page list.
356 */ 286 */
357STATIC int 287STATIC int
358_xfs_buf_lookup_pages( 288xfs_buf_allocate_memory(
359 xfs_buf_t *bp, 289 xfs_buf_t *bp,
360 uint flags) 290 uint flags)
361{ 291{
362 struct address_space *mapping = bp->b_target->bt_mapping;
363 size_t blocksize = bp->b_target->bt_bsize;
364 size_t size = bp->b_count_desired; 292 size_t size = bp->b_count_desired;
365 size_t nbytes, offset; 293 size_t nbytes, offset;
366 gfp_t gfp_mask = xb_to_gfp(flags); 294 gfp_t gfp_mask = xb_to_gfp(flags);
@@ -369,29 +297,55 @@ _xfs_buf_lookup_pages(
369 xfs_off_t end; 297 xfs_off_t end;
370 int error; 298 int error;
371 299
300 /*
301 * for buffers that are contained within a single page, just allocate
302 * the memory from the heap - there's no need for the complexity of
303 * page arrays to keep allocation down to order 0.
304 */
305 if (bp->b_buffer_length < PAGE_SIZE) {
306 bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
307 if (!bp->b_addr) {
308 /* low memory - use alloc_page loop instead */
309 goto use_alloc_page;
310 }
311
312 if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
313 PAGE_MASK) !=
314 ((unsigned long)bp->b_addr & PAGE_MASK)) {
315 /* b_addr spans two pages - use alloc_page instead */
316 kmem_free(bp->b_addr);
317 bp->b_addr = NULL;
318 goto use_alloc_page;
319 }
320 bp->b_offset = offset_in_page(bp->b_addr);
321 bp->b_pages = bp->b_page_array;
322 bp->b_pages[0] = virt_to_page(bp->b_addr);
323 bp->b_page_count = 1;
324 bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
325 return 0;
326 }
327
328use_alloc_page:
372 end = bp->b_file_offset + bp->b_buffer_length; 329 end = bp->b_file_offset + bp->b_buffer_length;
373 page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); 330 page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
374
375 error = _xfs_buf_get_pages(bp, page_count, flags); 331 error = _xfs_buf_get_pages(bp, page_count, flags);
376 if (unlikely(error)) 332 if (unlikely(error))
377 return error; 333 return error;
378 bp->b_flags |= _XBF_PAGE_CACHE;
379 334
380 offset = bp->b_offset; 335 offset = bp->b_offset;
381 first = bp->b_file_offset >> PAGE_CACHE_SHIFT; 336 first = bp->b_file_offset >> PAGE_SHIFT;
337 bp->b_flags |= _XBF_PAGES;
382 338
383 for (i = 0; i < bp->b_page_count; i++) { 339 for (i = 0; i < bp->b_page_count; i++) {
384 struct page *page; 340 struct page *page;
385 uint retries = 0; 341 uint retries = 0;
386 342retry:
387 retry: 343 page = alloc_page(gfp_mask);
388 page = find_or_create_page(mapping, first + i, gfp_mask);
389 if (unlikely(page == NULL)) { 344 if (unlikely(page == NULL)) {
390 if (flags & XBF_READ_AHEAD) { 345 if (flags & XBF_READ_AHEAD) {
391 bp->b_page_count = i; 346 bp->b_page_count = i;
392 for (i = 0; i < bp->b_page_count; i++) 347 error = ENOMEM;
393 unlock_page(bp->b_pages[i]); 348 goto out_free_pages;
394 return -ENOMEM;
395 } 349 }
396 350
397 /* 351 /*
@@ -401,9 +355,8 @@ _xfs_buf_lookup_pages(
401 * handle buffer allocation failures we can't do much. 355 * handle buffer allocation failures we can't do much.
402 */ 356 */
403 if (!(++retries % 100)) 357 if (!(++retries % 100))
404 printk(KERN_ERR 358 xfs_err(NULL,
405 "XFS: possible memory allocation " 359 "possible memory allocation deadlock in %s (mode:0x%x)",
406 "deadlock in %s (mode:0x%x)\n",
407 __func__, gfp_mask); 360 __func__, gfp_mask);
408 361
409 XFS_STATS_INC(xb_page_retries); 362 XFS_STATS_INC(xb_page_retries);
@@ -413,33 +366,16 @@ _xfs_buf_lookup_pages(
413 366
414 XFS_STATS_INC(xb_page_found); 367 XFS_STATS_INC(xb_page_found);
415 368
416 nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); 369 nbytes = min_t(size_t, size, PAGE_SIZE - offset);
417 size -= nbytes; 370 size -= nbytes;
418
419 ASSERT(!PagePrivate(page));
420 if (!PageUptodate(page)) {
421 page_count--;
422 if (blocksize >= PAGE_CACHE_SIZE) {
423 if (flags & XBF_READ)
424 bp->b_flags |= _XBF_PAGE_LOCKED;
425 } else if (!PagePrivate(page)) {
426 if (test_page_region(page, offset, nbytes))
427 page_count++;
428 }
429 }
430
431 bp->b_pages[i] = page; 371 bp->b_pages[i] = page;
432 offset = 0; 372 offset = 0;
433 } 373 }
374 return 0;
434 375
435 if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { 376out_free_pages:
436 for (i = 0; i < bp->b_page_count; i++) 377 for (i = 0; i < bp->b_page_count; i++)
437 unlock_page(bp->b_pages[i]); 378 __free_page(bp->b_pages[i]);
438 }
439
440 if (page_count == bp->b_page_count)
441 bp->b_flags |= XBF_DONE;
442
443 return error; 379 return error;
444} 380}
445 381
@@ -451,14 +387,23 @@ _xfs_buf_map_pages(
451 xfs_buf_t *bp, 387 xfs_buf_t *bp,
452 uint flags) 388 uint flags)
453{ 389{
454 /* A single page buffer is always mappable */ 390 ASSERT(bp->b_flags & _XBF_PAGES);
455 if (bp->b_page_count == 1) { 391 if (bp->b_page_count == 1) {
392 /* A single page buffer is always mappable */
456 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; 393 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
457 bp->b_flags |= XBF_MAPPED; 394 bp->b_flags |= XBF_MAPPED;
458 } else if (flags & XBF_MAPPED) { 395 } else if (flags & XBF_MAPPED) {
459 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, 396 int retried = 0;
460 -1, PAGE_KERNEL); 397
461 if (unlikely(bp->b_addr == NULL)) 398 do {
399 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
400 -1, PAGE_KERNEL);
401 if (bp->b_addr)
402 break;
403 vm_unmap_aliases();
404 } while (retried++ <= 1);
405
406 if (!bp->b_addr)
462 return -ENOMEM; 407 return -ENOMEM;
463 bp->b_addr += bp->b_offset; 408 bp->b_addr += bp->b_offset;
464 bp->b_flags |= XBF_MAPPED; 409 bp->b_flags |= XBF_MAPPED;
@@ -569,9 +514,14 @@ found:
569 } 514 }
570 } 515 }
571 516
517 /*
518 * if the buffer is stale, clear all the external state associated with
519 * it. We need to keep flags such as how we allocated the buffer memory
520 * intact here.
521 */
572 if (bp->b_flags & XBF_STALE) { 522 if (bp->b_flags & XBF_STALE) {
573 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); 523 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
574 bp->b_flags &= XBF_MAPPED; 524 bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
575 } 525 }
576 526
577 trace_xfs_buf_find(bp, flags, _RET_IP_); 527 trace_xfs_buf_find(bp, flags, _RET_IP_);
@@ -592,7 +542,7 @@ xfs_buf_get(
592 xfs_buf_flags_t flags) 542 xfs_buf_flags_t flags)
593{ 543{
594 xfs_buf_t *bp, *new_bp; 544 xfs_buf_t *bp, *new_bp;
595 int error = 0, i; 545 int error = 0;
596 546
597 new_bp = xfs_buf_allocate(flags); 547 new_bp = xfs_buf_allocate(flags);
598 if (unlikely(!new_bp)) 548 if (unlikely(!new_bp))
@@ -600,7 +550,7 @@ xfs_buf_get(
600 550
601 bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); 551 bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
602 if (bp == new_bp) { 552 if (bp == new_bp) {
603 error = _xfs_buf_lookup_pages(bp, flags); 553 error = xfs_buf_allocate_memory(bp, flags);
604 if (error) 554 if (error)
605 goto no_buffer; 555 goto no_buffer;
606 } else { 556 } else {
@@ -609,14 +559,11 @@ xfs_buf_get(
609 return NULL; 559 return NULL;
610 } 560 }
611 561
612 for (i = 0; i < bp->b_page_count; i++)
613 mark_page_accessed(bp->b_pages[i]);
614
615 if (!(bp->b_flags & XBF_MAPPED)) { 562 if (!(bp->b_flags & XBF_MAPPED)) {
616 error = _xfs_buf_map_pages(bp, flags); 563 error = _xfs_buf_map_pages(bp, flags);
617 if (unlikely(error)) { 564 if (unlikely(error)) {
618 printk(KERN_WARNING "%s: failed to map pages\n", 565 xfs_warn(target->bt_mount,
619 __func__); 566 "%s: failed to map pages\n", __func__);
620 goto no_buffer; 567 goto no_buffer;
621 } 568 }
622 } 569 }
@@ -712,8 +659,7 @@ xfs_buf_readahead(
712{ 659{
713 struct backing_dev_info *bdi; 660 struct backing_dev_info *bdi;
714 661
715 bdi = target->bt_mapping->backing_dev_info; 662 if (bdi_read_congested(target->bt_bdi))
716 if (bdi_read_congested(bdi))
717 return; 663 return;
718 664
719 xfs_buf_read(target, ioff, isize, 665 xfs_buf_read(target, ioff, isize,
@@ -791,10 +737,10 @@ xfs_buf_associate_memory(
791 size_t buflen; 737 size_t buflen;
792 int page_count; 738 int page_count;
793 739
794 pageaddr = (unsigned long)mem & PAGE_CACHE_MASK; 740 pageaddr = (unsigned long)mem & PAGE_MASK;
795 offset = (unsigned long)mem - pageaddr; 741 offset = (unsigned long)mem - pageaddr;
796 buflen = PAGE_CACHE_ALIGN(len + offset); 742 buflen = PAGE_ALIGN(len + offset);
797 page_count = buflen >> PAGE_CACHE_SHIFT; 743 page_count = buflen >> PAGE_SHIFT;
798 744
799 /* Free any previous set of page pointers */ 745 /* Free any previous set of page pointers */
800 if (bp->b_pages) 746 if (bp->b_pages)
@@ -811,13 +757,12 @@ xfs_buf_associate_memory(
811 757
812 for (i = 0; i < bp->b_page_count; i++) { 758 for (i = 0; i < bp->b_page_count; i++) {
813 bp->b_pages[i] = mem_to_page((void *)pageaddr); 759 bp->b_pages[i] = mem_to_page((void *)pageaddr);
814 pageaddr += PAGE_CACHE_SIZE; 760 pageaddr += PAGE_SIZE;
815 } 761 }
816 762
817 bp->b_count_desired = len; 763 bp->b_count_desired = len;
818 bp->b_buffer_length = buflen; 764 bp->b_buffer_length = buflen;
819 bp->b_flags |= XBF_MAPPED; 765 bp->b_flags |= XBF_MAPPED;
820 bp->b_flags &= ~_XBF_PAGE_LOCKED;
821 766
822 return 0; 767 return 0;
823} 768}
@@ -850,8 +795,8 @@ xfs_buf_get_uncached(
850 795
851 error = _xfs_buf_map_pages(bp, XBF_MAPPED); 796 error = _xfs_buf_map_pages(bp, XBF_MAPPED);
852 if (unlikely(error)) { 797 if (unlikely(error)) {
853 printk(KERN_WARNING "%s: failed to map pages\n", 798 xfs_warn(target->bt_mount,
854 __func__); 799 "%s: failed to map pages\n", __func__);
855 goto fail_free_mem; 800 goto fail_free_mem;
856 } 801 }
857 802
@@ -924,20 +869,7 @@ xfs_buf_rele(
924 869
925 870
926/* 871/*
927 * Mutual exclusion on buffers. Locking model: 872 * Lock a buffer object, if it is not already locked.
928 *
929 * Buffers associated with inodes for which buffer locking
930 * is not enabled are not protected by semaphores, and are
931 * assumed to be exclusively owned by the caller. There is a
932 * spinlock in the buffer, used by the caller when concurrent
933 * access is possible.
934 */
935
936/*
937 * Locks a buffer object, if it is not already locked. Note that this in
938 * no way locks the underlying pages, so it is only useful for
939 * synchronizing concurrent use of buffer objects, not for synchronizing
940 * independent access to the underlying pages.
941 * 873 *
942 * If we come across a stale, pinned, locked buffer, we know that we are 874 * If we come across a stale, pinned, locked buffer, we know that we are
943 * being asked to lock a buffer that has been reallocated. Because it is 875 * being asked to lock a buffer that has been reallocated. Because it is
@@ -971,10 +903,7 @@ xfs_buf_lock_value(
971} 903}
972 904
973/* 905/*
974 * Locks a buffer object. 906 * Lock a buffer object.
975 * Note that this in no way locks the underlying pages, so it is only
976 * useful for synchronizing concurrent use of buffer objects, not for
977 * synchronizing independent access to the underlying pages.
978 * 907 *
979 * If we come across a stale, pinned, locked buffer, we know that we 908 * If we come across a stale, pinned, locked buffer, we know that we
980 * are being asked to lock a buffer that has been reallocated. Because 909 * are being asked to lock a buffer that has been reallocated. Because
@@ -991,7 +920,7 @@ xfs_buf_lock(
991 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 920 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
992 xfs_log_force(bp->b_target->bt_mount, 0); 921 xfs_log_force(bp->b_target->bt_mount, 0);
993 if (atomic_read(&bp->b_io_remaining)) 922 if (atomic_read(&bp->b_io_remaining))
994 blk_run_address_space(bp->b_target->bt_mapping); 923 blk_flush_plug(current);
995 down(&bp->b_sema); 924 down(&bp->b_sema);
996 XB_SET_OWNER(bp); 925 XB_SET_OWNER(bp);
997 926
@@ -1035,9 +964,7 @@ xfs_buf_wait_unpin(
1035 set_current_state(TASK_UNINTERRUPTIBLE); 964 set_current_state(TASK_UNINTERRUPTIBLE);
1036 if (atomic_read(&bp->b_pin_count) == 0) 965 if (atomic_read(&bp->b_pin_count) == 0)
1037 break; 966 break;
1038 if (atomic_read(&bp->b_io_remaining)) 967 io_schedule();
1039 blk_run_address_space(bp->b_target->bt_mapping);
1040 schedule();
1041 } 968 }
1042 remove_wait_queue(&bp->b_waiters, &wait); 969 remove_wait_queue(&bp->b_waiters, &wait);
1043 set_current_state(TASK_RUNNING); 970 set_current_state(TASK_RUNNING);
@@ -1249,10 +1176,8 @@ _xfs_buf_ioend(
1249 xfs_buf_t *bp, 1176 xfs_buf_t *bp,
1250 int schedule) 1177 int schedule)
1251{ 1178{
1252 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { 1179 if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1253 bp->b_flags &= ~_XBF_PAGE_LOCKED;
1254 xfs_buf_ioend(bp, schedule); 1180 xfs_buf_ioend(bp, schedule);
1255 }
1256} 1181}
1257 1182
1258STATIC void 1183STATIC void
@@ -1261,35 +1186,12 @@ xfs_buf_bio_end_io(
1261 int error) 1186 int error)
1262{ 1187{
1263 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; 1188 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
1264 unsigned int blocksize = bp->b_target->bt_bsize;
1265 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1266 1189
1267 xfs_buf_ioerror(bp, -error); 1190 xfs_buf_ioerror(bp, -error);
1268 1191
1269 if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) 1192 if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1270 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); 1193 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1271 1194
1272 do {
1273 struct page *page = bvec->bv_page;
1274
1275 ASSERT(!PagePrivate(page));
1276 if (unlikely(bp->b_error)) {
1277 if (bp->b_flags & XBF_READ)
1278 ClearPageUptodate(page);
1279 } else if (blocksize >= PAGE_CACHE_SIZE) {
1280 SetPageUptodate(page);
1281 } else if (!PagePrivate(page) &&
1282 (bp->b_flags & _XBF_PAGE_CACHE)) {
1283 set_page_region(page, bvec->bv_offset, bvec->bv_len);
1284 }
1285
1286 if (--bvec >= bio->bi_io_vec)
1287 prefetchw(&bvec->bv_page->flags);
1288
1289 if (bp->b_flags & _XBF_PAGE_LOCKED)
1290 unlock_page(page);
1291 } while (bvec >= bio->bi_io_vec);
1292
1293 _xfs_buf_ioend(bp, 1); 1195 _xfs_buf_ioend(bp, 1);
1294 bio_put(bio); 1196 bio_put(bio);
1295} 1197}
@@ -1303,7 +1205,6 @@ _xfs_buf_ioapply(
1303 int offset = bp->b_offset; 1205 int offset = bp->b_offset;
1304 int size = bp->b_count_desired; 1206 int size = bp->b_count_desired;
1305 sector_t sector = bp->b_bn; 1207 sector_t sector = bp->b_bn;
1306 unsigned int blocksize = bp->b_target->bt_bsize;
1307 1208
1308 total_nr_pages = bp->b_page_count; 1209 total_nr_pages = bp->b_page_count;
1309 map_i = 0; 1210 map_i = 0;
@@ -1324,29 +1225,6 @@ _xfs_buf_ioapply(
1324 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; 1225 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
1325 } 1226 }
1326 1227
1327 /* Special code path for reading a sub page size buffer in --
1328 * we populate up the whole page, and hence the other metadata
1329 * in the same page. This optimization is only valid when the
1330 * filesystem block size is not smaller than the page size.
1331 */
1332 if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
1333 ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
1334 (XBF_READ|_XBF_PAGE_LOCKED)) &&
1335 (blocksize >= PAGE_CACHE_SIZE)) {
1336 bio = bio_alloc(GFP_NOIO, 1);
1337
1338 bio->bi_bdev = bp->b_target->bt_bdev;
1339 bio->bi_sector = sector - (offset >> BBSHIFT);
1340 bio->bi_end_io = xfs_buf_bio_end_io;
1341 bio->bi_private = bp;
1342
1343 bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0);
1344 size = 0;
1345
1346 atomic_inc(&bp->b_io_remaining);
1347
1348 goto submit_io;
1349 }
1350 1228
1351next_chunk: 1229next_chunk:
1352 atomic_inc(&bp->b_io_remaining); 1230 atomic_inc(&bp->b_io_remaining);
@@ -1360,8 +1238,9 @@ next_chunk:
1360 bio->bi_end_io = xfs_buf_bio_end_io; 1238 bio->bi_end_io = xfs_buf_bio_end_io;
1361 bio->bi_private = bp; 1239 bio->bi_private = bp;
1362 1240
1241
1363 for (; size && nr_pages; nr_pages--, map_i++) { 1242 for (; size && nr_pages; nr_pages--, map_i++) {
1364 int rbytes, nbytes = PAGE_CACHE_SIZE - offset; 1243 int rbytes, nbytes = PAGE_SIZE - offset;
1365 1244
1366 if (nbytes > size) 1245 if (nbytes > size)
1367 nbytes = size; 1246 nbytes = size;
@@ -1376,7 +1255,6 @@ next_chunk:
1376 total_nr_pages--; 1255 total_nr_pages--;
1377 } 1256 }
1378 1257
1379submit_io:
1380 if (likely(bio->bi_size)) { 1258 if (likely(bio->bi_size)) {
1381 if (xfs_buf_is_vmapped(bp)) { 1259 if (xfs_buf_is_vmapped(bp)) {
1382 flush_kernel_vmap_range(bp->b_addr, 1260 flush_kernel_vmap_range(bp->b_addr,
@@ -1386,18 +1264,7 @@ submit_io:
1386 if (size) 1264 if (size)
1387 goto next_chunk; 1265 goto next_chunk;
1388 } else { 1266 } else {
1389 /*
1390 * if we get here, no pages were added to the bio. However,
1391 * we can't just error out here - if the pages are locked then
1392 * we have to unlock them otherwise we can hang on a later
1393 * access to the page.
1394 */
1395 xfs_buf_ioerror(bp, EIO); 1267 xfs_buf_ioerror(bp, EIO);
1396 if (bp->b_flags & _XBF_PAGE_LOCKED) {
1397 int i;
1398 for (i = 0; i < bp->b_page_count; i++)
1399 unlock_page(bp->b_pages[i]);
1400 }
1401 bio_put(bio); 1268 bio_put(bio);
1402 } 1269 }
1403} 1270}
@@ -1443,7 +1310,7 @@ xfs_buf_iowait(
1443 trace_xfs_buf_iowait(bp, _RET_IP_); 1310 trace_xfs_buf_iowait(bp, _RET_IP_);
1444 1311
1445 if (atomic_read(&bp->b_io_remaining)) 1312 if (atomic_read(&bp->b_io_remaining))
1446 blk_run_address_space(bp->b_target->bt_mapping); 1313 blk_flush_plug(current);
1447 wait_for_completion(&bp->b_iowait); 1314 wait_for_completion(&bp->b_iowait);
1448 1315
1449 trace_xfs_buf_iowait_done(bp, _RET_IP_); 1316 trace_xfs_buf_iowait_done(bp, _RET_IP_);
@@ -1461,8 +1328,8 @@ xfs_buf_offset(
1461 return XFS_BUF_PTR(bp) + offset; 1328 return XFS_BUF_PTR(bp) + offset;
1462 1329
1463 offset += bp->b_offset; 1330 offset += bp->b_offset;
1464 page = bp->b_pages[offset >> PAGE_CACHE_SHIFT]; 1331 page = bp->b_pages[offset >> PAGE_SHIFT];
1465 return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1)); 1332 return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
1466} 1333}
1467 1334
1468/* 1335/*
@@ -1484,9 +1351,9 @@ xfs_buf_iomove(
1484 page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; 1351 page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
1485 cpoff = xfs_buf_poff(boff + bp->b_offset); 1352 cpoff = xfs_buf_poff(boff + bp->b_offset);
1486 csize = min_t(size_t, 1353 csize = min_t(size_t,
1487 PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff); 1354 PAGE_SIZE-cpoff, bp->b_count_desired-boff);
1488 1355
1489 ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); 1356 ASSERT(((csize + cpoff) <= PAGE_SIZE));
1490 1357
1491 switch (mode) { 1358 switch (mode) {
1492 case XBRW_ZERO: 1359 case XBRW_ZERO:
@@ -1599,7 +1466,6 @@ xfs_free_buftarg(
1599 xfs_flush_buftarg(btp, 1); 1466 xfs_flush_buftarg(btp, 1);
1600 if (mp->m_flags & XFS_MOUNT_BARRIER) 1467 if (mp->m_flags & XFS_MOUNT_BARRIER)
1601 xfs_blkdev_issue_flush(btp); 1468 xfs_blkdev_issue_flush(btp);
1602 iput(btp->bt_mapping->host);
1603 1469
1604 kthread_stop(btp->bt_task); 1470 kthread_stop(btp->bt_task);
1605 kmem_free(btp); 1471 kmem_free(btp);
@@ -1617,21 +1483,12 @@ xfs_setsize_buftarg_flags(
1617 btp->bt_smask = sectorsize - 1; 1483 btp->bt_smask = sectorsize - 1;
1618 1484
1619 if (set_blocksize(btp->bt_bdev, sectorsize)) { 1485 if (set_blocksize(btp->bt_bdev, sectorsize)) {
1620 printk(KERN_WARNING 1486 xfs_warn(btp->bt_mount,
1621 "XFS: Cannot set_blocksize to %u on device %s\n", 1487 "Cannot set_blocksize to %u on device %s\n",
1622 sectorsize, XFS_BUFTARG_NAME(btp)); 1488 sectorsize, XFS_BUFTARG_NAME(btp));
1623 return EINVAL; 1489 return EINVAL;
1624 } 1490 }
1625 1491
1626 if (verbose &&
1627 (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) {
1628 printk(KERN_WARNING
1629 "XFS: %u byte sectors in use on device %s. "
1630 "This is suboptimal; %u or greater is ideal.\n",
1631 sectorsize, XFS_BUFTARG_NAME(btp),
1632 (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG);
1633 }
1634
1635 return 0; 1492 return 0;
1636} 1493}
1637 1494
@@ -1646,7 +1503,7 @@ xfs_setsize_buftarg_early(
1646 struct block_device *bdev) 1503 struct block_device *bdev)
1647{ 1504{
1648 return xfs_setsize_buftarg_flags(btp, 1505 return xfs_setsize_buftarg_flags(btp,
1649 PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0); 1506 PAGE_SIZE, bdev_logical_block_size(bdev), 0);
1650} 1507}
1651 1508
1652int 1509int
@@ -1659,41 +1516,6 @@ xfs_setsize_buftarg(
1659} 1516}
1660 1517
1661STATIC int 1518STATIC int
1662xfs_mapping_buftarg(
1663 xfs_buftarg_t *btp,
1664 struct block_device *bdev)
1665{
1666 struct backing_dev_info *bdi;
1667 struct inode *inode;
1668 struct address_space *mapping;
1669 static const struct address_space_operations mapping_aops = {
1670 .sync_page = block_sync_page,
1671 .migratepage = fail_migrate_page,
1672 };
1673
1674 inode = new_inode(bdev->bd_inode->i_sb);
1675 if (!inode) {
1676 printk(KERN_WARNING
1677 "XFS: Cannot allocate mapping inode for device %s\n",
1678 XFS_BUFTARG_NAME(btp));
1679 return ENOMEM;
1680 }
1681 inode->i_ino = get_next_ino();
1682 inode->i_mode = S_IFBLK;
1683 inode->i_bdev = bdev;
1684 inode->i_rdev = bdev->bd_dev;
1685 bdi = blk_get_backing_dev_info(bdev);
1686 if (!bdi)
1687 bdi = &default_backing_dev_info;
1688 mapping = &inode->i_data;
1689 mapping->a_ops = &mapping_aops;
1690 mapping->backing_dev_info = bdi;
1691 mapping_set_gfp_mask(mapping, GFP_NOFS);
1692 btp->bt_mapping = mapping;
1693 return 0;
1694}
1695
1696STATIC int
1697xfs_alloc_delwrite_queue( 1519xfs_alloc_delwrite_queue(
1698 xfs_buftarg_t *btp, 1520 xfs_buftarg_t *btp,
1699 const char *fsname) 1521 const char *fsname)
@@ -1721,12 +1543,14 @@ xfs_alloc_buftarg(
1721 btp->bt_mount = mp; 1543 btp->bt_mount = mp;
1722 btp->bt_dev = bdev->bd_dev; 1544 btp->bt_dev = bdev->bd_dev;
1723 btp->bt_bdev = bdev; 1545 btp->bt_bdev = bdev;
1546 btp->bt_bdi = blk_get_backing_dev_info(bdev);
1547 if (!btp->bt_bdi)
1548 goto error;
1549
1724 INIT_LIST_HEAD(&btp->bt_lru); 1550 INIT_LIST_HEAD(&btp->bt_lru);
1725 spin_lock_init(&btp->bt_lru_lock); 1551 spin_lock_init(&btp->bt_lru_lock);
1726 if (xfs_setsize_buftarg_early(btp, bdev)) 1552 if (xfs_setsize_buftarg_early(btp, bdev))
1727 goto error; 1553 goto error;
1728 if (xfs_mapping_buftarg(btp, bdev))
1729 goto error;
1730 if (xfs_alloc_delwrite_queue(btp, fsname)) 1554 if (xfs_alloc_delwrite_queue(btp, fsname))
1731 goto error; 1555 goto error;
1732 btp->bt_shrinker.shrink = xfs_buftarg_shrink; 1556 btp->bt_shrinker.shrink = xfs_buftarg_shrink;
@@ -1948,7 +1772,7 @@ xfsbufd(
1948 count++; 1772 count++;
1949 } 1773 }
1950 if (count) 1774 if (count)
1951 blk_run_address_space(target->bt_mapping); 1775 blk_flush_plug(current);
1952 1776
1953 } while (!kthread_should_stop()); 1777 } while (!kthread_should_stop());
1954 1778
@@ -1996,7 +1820,7 @@ xfs_flush_buftarg(
1996 1820
1997 if (wait) { 1821 if (wait) {
1998 /* Expedite and wait for IO to complete. */ 1822 /* Expedite and wait for IO to complete. */
1999 blk_run_address_space(target->bt_mapping); 1823 blk_flush_plug(current);
2000 while (!list_empty(&wait_list)) { 1824 while (!list_empty(&wait_list)) {
2001 bp = list_first_entry(&wait_list, struct xfs_buf, b_list); 1825 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
2002 1826
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index cbe65950e524..a9a1c4512645 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -61,30 +61,11 @@ typedef enum {
61#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ 61#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */
62 62
63/* flags used only internally */ 63/* flags used only internally */
64#define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */
65#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ 64#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */
66#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ 65#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */
66#define _XBF_KMEM (1 << 20)/* backed by heap memory */
67#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ 67#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */
68 68
69/*
70 * Special flag for supporting metadata blocks smaller than a FSB.
71 *
72 * In this case we can have multiple xfs_buf_t on a single page and
73 * need to lock out concurrent xfs_buf_t readers as they only
74 * serialise access to the buffer.
75 *
76 * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
77 * between reads of the page. Hence we can have one thread read the
78 * page and modify it, but then race with another thread that thinks
79 * the page is not up-to-date and hence reads it again.
80 *
81 * The result is that the first modifcation to the page is lost.
82 * This sort of AGF/AGI reading race can happen when unlinking inodes
83 * that require truncation and results in the AGI unlinked list
84 * modifications being lost.
85 */
86#define _XBF_PAGE_LOCKED (1 << 22)
87
88typedef unsigned int xfs_buf_flags_t; 69typedef unsigned int xfs_buf_flags_t;
89 70
90#define XFS_BUF_FLAGS \ 71#define XFS_BUF_FLAGS \
@@ -100,12 +81,10 @@ typedef unsigned int xfs_buf_flags_t;
100 { XBF_LOCK, "LOCK" }, /* should never be set */\ 81 { XBF_LOCK, "LOCK" }, /* should never be set */\
101 { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ 82 { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\
102 { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ 83 { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\
103 { _XBF_PAGE_CACHE, "PAGE_CACHE" }, \
104 { _XBF_PAGES, "PAGES" }, \ 84 { _XBF_PAGES, "PAGES" }, \
105 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ 85 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
106 { _XBF_DELWRI_Q, "DELWRI_Q" }, \ 86 { _XBF_KMEM, "KMEM" }, \
107 { _XBF_PAGE_LOCKED, "PAGE_LOCKED" } 87 { _XBF_DELWRI_Q, "DELWRI_Q" }
108
109 88
110typedef enum { 89typedef enum {
111 XBT_FORCE_SLEEP = 0, 90 XBT_FORCE_SLEEP = 0,
@@ -120,7 +99,7 @@ typedef struct xfs_bufhash {
120typedef struct xfs_buftarg { 99typedef struct xfs_buftarg {
121 dev_t bt_dev; 100 dev_t bt_dev;
122 struct block_device *bt_bdev; 101 struct block_device *bt_bdev;
123 struct address_space *bt_mapping; 102 struct backing_dev_info *bt_bdi;
124 struct xfs_mount *bt_mount; 103 struct xfs_mount *bt_mount;
125 unsigned int bt_bsize; 104 unsigned int bt_bsize;
126 unsigned int bt_sshift; 105 unsigned int bt_sshift;
@@ -139,17 +118,6 @@ typedef struct xfs_buftarg {
139 unsigned int bt_lru_nr; 118 unsigned int bt_lru_nr;
140} xfs_buftarg_t; 119} xfs_buftarg_t;
141 120
142/*
143 * xfs_buf_t: Buffer structure for pagecache-based buffers
144 *
145 * This buffer structure is used by the pagecache buffer management routines
146 * to refer to an assembly of pages forming a logical buffer.
147 *
148 * The buffer structure is used on a temporary basis only, and discarded when
149 * released. The real data storage is recorded in the pagecache. Buffers are
150 * hashed to the block device on which the file system resides.
151 */
152
153struct xfs_buf; 121struct xfs_buf;
154typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); 122typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
155 123
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index a55c1b46b219..52aadfbed132 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -896,6 +896,7 @@ xfs_file_fallocate(
896 xfs_flock64_t bf; 896 xfs_flock64_t bf;
897 xfs_inode_t *ip = XFS_I(inode); 897 xfs_inode_t *ip = XFS_I(inode);
898 int cmd = XFS_IOC_RESVSP; 898 int cmd = XFS_IOC_RESVSP;
899 int attr_flags = XFS_ATTR_NOLOCK;
899 900
900 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 901 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
901 return -EOPNOTSUPP; 902 return -EOPNOTSUPP;
@@ -918,7 +919,10 @@ xfs_file_fallocate(
918 goto out_unlock; 919 goto out_unlock;
919 } 920 }
920 921
921 error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK); 922 if (file->f_flags & O_DSYNC)
923 attr_flags |= XFS_ATTR_SYNC;
924
925 error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
922 if (error) 926 if (error)
923 goto out_unlock; 927 goto out_unlock;
924 928
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 0ca0e3c024d7..acca2c5ca3fa 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -624,6 +624,10 @@ xfs_ioc_space(
624 624
625 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 625 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
626 attr_flags |= XFS_ATTR_NONBLOCK; 626 attr_flags |= XFS_ATTR_NONBLOCK;
627
628 if (filp->f_flags & O_DSYNC)
629 attr_flags |= XFS_ATTR_SYNC;
630
627 if (ioflags & IO_INVIS) 631 if (ioflags & IO_INVIS)
628 attr_flags |= XFS_ATTR_DMI; 632 attr_flags |= XFS_ATTR_DMI;
629 633
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 096494997747..244be9cbfe78 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -39,7 +39,6 @@
39#include <mrlock.h> 39#include <mrlock.h>
40#include <time.h> 40#include <time.h>
41 41
42#include <support/debug.h>
43#include <support/uuid.h> 42#include <support/uuid.h>
44 43
45#include <linux/semaphore.h> 44#include <linux/semaphore.h>
@@ -86,6 +85,7 @@
86#include <xfs_aops.h> 85#include <xfs_aops.h>
87#include <xfs_super.h> 86#include <xfs_super.h>
88#include <xfs_buf.h> 87#include <xfs_buf.h>
88#include <xfs_message.h>
89 89
90/* 90/*
91 * Feature macros (disable/enable) 91 * Feature macros (disable/enable)
@@ -280,4 +280,25 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
280#define __arch_pack 280#define __arch_pack
281#endif 281#endif
282 282
283#define ASSERT_ALWAYS(expr) \
284 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
285
286#ifndef DEBUG
287#define ASSERT(expr) ((void)0)
288
289#ifndef STATIC
290# define STATIC static noinline
291#endif
292
293#else /* DEBUG */
294
295#define ASSERT(expr) \
296 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
297
298#ifndef STATIC
299# define STATIC noinline
300#endif
301
302#endif /* DEBUG */
303
283#endif /* __XFS_LINUX__ */ 304#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
new file mode 100644
index 000000000000..508e06fd7d1e
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -0,0 +1,133 @@
1/*
2 * Copyright (c) 2011 Red Hat, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write the Free Software Foundation,
15 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_log.h"
22#include "xfs_inum.h"
23#include "xfs_trans.h"
24#include "xfs_sb.h"
25#include "xfs_ag.h"
26#include "xfs_mount.h"
27
28/*
29 * XFS logging functions
30 */
31static int
32__xfs_printk(
33 const char *level,
34 const struct xfs_mount *mp,
35 struct va_format *vaf)
36{
37 if (mp && mp->m_fsname)
38 return printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
39 return printk("%sXFS: %pV\n", level, vaf);
40}
41
42int xfs_printk(
43 const char *level,
44 const struct xfs_mount *mp,
45 const char *fmt, ...)
46{
47 struct va_format vaf;
48 va_list args;
49 int r;
50
51 va_start(args, fmt);
52
53 vaf.fmt = fmt;
54 vaf.va = &args;
55
56 r = __xfs_printk(level, mp, &vaf);
57 va_end(args);
58
59 return r;
60}
61
62#define define_xfs_printk_level(func, kern_level) \
63int func(const struct xfs_mount *mp, const char *fmt, ...) \
64{ \
65 struct va_format vaf; \
66 va_list args; \
67 int r; \
68 \
69 va_start(args, fmt); \
70 \
71 vaf.fmt = fmt; \
72 vaf.va = &args; \
73 \
74 r = __xfs_printk(kern_level, mp, &vaf); \
75 va_end(args); \
76 \
77 return r; \
78} \
79
80define_xfs_printk_level(xfs_emerg, KERN_EMERG);
81define_xfs_printk_level(xfs_alert, KERN_ALERT);
82define_xfs_printk_level(xfs_crit, KERN_CRIT);
83define_xfs_printk_level(xfs_err, KERN_ERR);
84define_xfs_printk_level(xfs_warn, KERN_WARNING);
85define_xfs_printk_level(xfs_notice, KERN_NOTICE);
86define_xfs_printk_level(xfs_info, KERN_INFO);
87#ifdef DEBUG
88define_xfs_printk_level(xfs_debug, KERN_DEBUG);
89#endif
90
91int
92xfs_alert_tag(
93 const struct xfs_mount *mp,
94 int panic_tag,
95 const char *fmt, ...)
96{
97 struct va_format vaf;
98 va_list args;
99 int do_panic = 0;
100 int r;
101
102 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
103 xfs_printk(KERN_ALERT, mp,
104 "XFS: Transforming an alert into a BUG.");
105 do_panic = 1;
106 }
107
108 va_start(args, fmt);
109
110 vaf.fmt = fmt;
111 vaf.va = &args;
112
113 r = __xfs_printk(KERN_ALERT, mp, &vaf);
114 va_end(args);
115
116 BUG_ON(do_panic);
117
118 return r;
119}
120
121void
122assfail(char *expr, char *file, int line)
123{
124 xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
125 expr, file, line);
126 BUG();
127}
128
129void
130xfs_hex_dump(void *p, int length)
131{
132 print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
133}
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
new file mode 100644
index 000000000000..e77ffa16745b
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_message.h
@@ -0,0 +1,38 @@
1#ifndef __XFS_MESSAGE_H
2#define __XFS_MESSAGE_H 1
3
4struct xfs_mount;
5
6extern int xfs_printk(const char *level, const struct xfs_mount *mp,
7 const char *fmt, ...)
8 __attribute__ ((format (printf, 3, 4)));
9extern int xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
10 __attribute__ ((format (printf, 2, 3)));
11extern int xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
12 __attribute__ ((format (printf, 2, 3)));
13extern int xfs_alert_tag(const struct xfs_mount *mp, int tag,
14 const char *fmt, ...)
15 __attribute__ ((format (printf, 3, 4)));
16extern int xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
17 __attribute__ ((format (printf, 2, 3)));
18extern int xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
19 __attribute__ ((format (printf, 2, 3)));
20extern int xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
21 __attribute__ ((format (printf, 2, 3)));
22extern int xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
23 __attribute__ ((format (printf, 2, 3)));
24extern int xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
25 __attribute__ ((format (printf, 2, 3)));
26
27#ifdef DEBUG
28extern int xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
29 __attribute__ ((format (printf, 2, 3)));
30#else
31#define xfs_debug(mp, fmt, ...) (0)
32#endif
33
34extern void assfail(char *expr, char *f, int l);
35
36extern void xfs_hex_dump(void *p, int length);
37
38#endif /* __XFS_MESSAGE_H */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 9731898083ae..1ba5c451da36 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -173,6 +173,15 @@ xfs_parseargs(
173 __uint8_t iosizelog = 0; 173 __uint8_t iosizelog = 0;
174 174
175 /* 175 /*
176 * set up the mount name first so all the errors will refer to the
177 * correct device.
178 */
179 mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
180 if (!mp->m_fsname)
181 return ENOMEM;
182 mp->m_fsname_len = strlen(mp->m_fsname) + 1;
183
184 /*
176 * Copy binary VFS mount flags we are interested in. 185 * Copy binary VFS mount flags we are interested in.
177 */ 186 */
178 if (sb->s_flags & MS_RDONLY) 187 if (sb->s_flags & MS_RDONLY)
@@ -189,6 +198,7 @@ xfs_parseargs(
189 mp->m_flags |= XFS_MOUNT_BARRIER; 198 mp->m_flags |= XFS_MOUNT_BARRIER;
190 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; 199 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
191 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 200 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
201 mp->m_flags |= XFS_MOUNT_DELAYLOG;
192 202
193 /* 203 /*
194 * These can be overridden by the mount option parsing. 204 * These can be overridden by the mount option parsing.
@@ -207,24 +217,21 @@ xfs_parseargs(
207 217
208 if (!strcmp(this_char, MNTOPT_LOGBUFS)) { 218 if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
209 if (!value || !*value) { 219 if (!value || !*value) {
210 cmn_err(CE_WARN, 220 xfs_warn(mp, "%s option requires an argument",
211 "XFS: %s option requires an argument",
212 this_char); 221 this_char);
213 return EINVAL; 222 return EINVAL;
214 } 223 }
215 mp->m_logbufs = simple_strtoul(value, &eov, 10); 224 mp->m_logbufs = simple_strtoul(value, &eov, 10);
216 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { 225 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
217 if (!value || !*value) { 226 if (!value || !*value) {
218 cmn_err(CE_WARN, 227 xfs_warn(mp, "%s option requires an argument",
219 "XFS: %s option requires an argument",
220 this_char); 228 this_char);
221 return EINVAL; 229 return EINVAL;
222 } 230 }
223 mp->m_logbsize = suffix_strtoul(value, &eov, 10); 231 mp->m_logbsize = suffix_strtoul(value, &eov, 10);
224 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { 232 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
225 if (!value || !*value) { 233 if (!value || !*value) {
226 cmn_err(CE_WARN, 234 xfs_warn(mp, "%s option requires an argument",
227 "XFS: %s option requires an argument",
228 this_char); 235 this_char);
229 return EINVAL; 236 return EINVAL;
230 } 237 }
@@ -232,14 +239,12 @@ xfs_parseargs(
232 if (!mp->m_logname) 239 if (!mp->m_logname)
233 return ENOMEM; 240 return ENOMEM;
234 } else if (!strcmp(this_char, MNTOPT_MTPT)) { 241 } else if (!strcmp(this_char, MNTOPT_MTPT)) {
235 cmn_err(CE_WARN, 242 xfs_warn(mp, "%s option not allowed on this system",
236 "XFS: %s option not allowed on this system",
237 this_char); 243 this_char);
238 return EINVAL; 244 return EINVAL;
239 } else if (!strcmp(this_char, MNTOPT_RTDEV)) { 245 } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
240 if (!value || !*value) { 246 if (!value || !*value) {
241 cmn_err(CE_WARN, 247 xfs_warn(mp, "%s option requires an argument",
242 "XFS: %s option requires an argument",
243 this_char); 248 this_char);
244 return EINVAL; 249 return EINVAL;
245 } 250 }
@@ -248,8 +253,7 @@ xfs_parseargs(
248 return ENOMEM; 253 return ENOMEM;
249 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { 254 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
250 if (!value || !*value) { 255 if (!value || !*value) {
251 cmn_err(CE_WARN, 256 xfs_warn(mp, "%s option requires an argument",
252 "XFS: %s option requires an argument",
253 this_char); 257 this_char);
254 return EINVAL; 258 return EINVAL;
255 } 259 }
@@ -257,8 +261,7 @@ xfs_parseargs(
257 iosizelog = ffs(iosize) - 1; 261 iosizelog = ffs(iosize) - 1;
258 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { 262 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
259 if (!value || !*value) { 263 if (!value || !*value) {
260 cmn_err(CE_WARN, 264 xfs_warn(mp, "%s option requires an argument",
261 "XFS: %s option requires an argument",
262 this_char); 265 this_char);
263 return EINVAL; 266 return EINVAL;
264 } 267 }
@@ -280,16 +283,14 @@ xfs_parseargs(
280 mp->m_flags |= XFS_MOUNT_SWALLOC; 283 mp->m_flags |= XFS_MOUNT_SWALLOC;
281 } else if (!strcmp(this_char, MNTOPT_SUNIT)) { 284 } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
282 if (!value || !*value) { 285 if (!value || !*value) {
283 cmn_err(CE_WARN, 286 xfs_warn(mp, "%s option requires an argument",
284 "XFS: %s option requires an argument",
285 this_char); 287 this_char);
286 return EINVAL; 288 return EINVAL;
287 } 289 }
288 dsunit = simple_strtoul(value, &eov, 10); 290 dsunit = simple_strtoul(value, &eov, 10);
289 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { 291 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
290 if (!value || !*value) { 292 if (!value || !*value) {
291 cmn_err(CE_WARN, 293 xfs_warn(mp, "%s option requires an argument",
292 "XFS: %s option requires an argument",
293 this_char); 294 this_char);
294 return EINVAL; 295 return EINVAL;
295 } 296 }
@@ -297,8 +298,7 @@ xfs_parseargs(
297 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { 298 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
298 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; 299 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
299#if !XFS_BIG_INUMS 300#if !XFS_BIG_INUMS
300 cmn_err(CE_WARN, 301 xfs_warn(mp, "%s option not allowed on this system",
301 "XFS: %s option not allowed on this system",
302 this_char); 302 this_char);
303 return EINVAL; 303 return EINVAL;
304#endif 304#endif
@@ -356,20 +356,19 @@ xfs_parseargs(
356 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { 356 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
357 mp->m_flags &= ~XFS_MOUNT_DELAYLOG; 357 mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
358 } else if (!strcmp(this_char, "ihashsize")) { 358 } else if (!strcmp(this_char, "ihashsize")) {
359 cmn_err(CE_WARN, 359 xfs_warn(mp,
360 "XFS: ihashsize no longer used, option is deprecated."); 360 "ihashsize no longer used, option is deprecated.");
361 } else if (!strcmp(this_char, "osyncisdsync")) { 361 } else if (!strcmp(this_char, "osyncisdsync")) {
362 cmn_err(CE_WARN, 362 xfs_warn(mp,
363 "XFS: osyncisdsync has no effect, option is deprecated."); 363 "osyncisdsync has no effect, option is deprecated.");
364 } else if (!strcmp(this_char, "osyncisosync")) { 364 } else if (!strcmp(this_char, "osyncisosync")) {
365 cmn_err(CE_WARN, 365 xfs_warn(mp,
366 "XFS: osyncisosync has no effect, option is deprecated."); 366 "osyncisosync has no effect, option is deprecated.");
367 } else if (!strcmp(this_char, "irixsgid")) { 367 } else if (!strcmp(this_char, "irixsgid")) {
368 cmn_err(CE_WARN, 368 xfs_warn(mp,
369 "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); 369 "irixsgid is now a sysctl(2) variable, option is deprecated.");
370 } else { 370 } else {
371 cmn_err(CE_WARN, 371 xfs_warn(mp, "unknown mount option [%s].", this_char);
372 "XFS: unknown mount option [%s].", this_char);
373 return EINVAL; 372 return EINVAL;
374 } 373 }
375 } 374 }
@@ -379,40 +378,37 @@ xfs_parseargs(
379 */ 378 */
380 if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && 379 if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
381 !(mp->m_flags & XFS_MOUNT_RDONLY)) { 380 !(mp->m_flags & XFS_MOUNT_RDONLY)) {
382 cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only."); 381 xfs_warn(mp, "no-recovery mounts must be read-only.");
383 return EINVAL; 382 return EINVAL;
384 } 383 }
385 384
386 if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { 385 if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
387 cmn_err(CE_WARN, 386 xfs_warn(mp,
388 "XFS: sunit and swidth options incompatible with the noalign option"); 387 "sunit and swidth options incompatible with the noalign option");
389 return EINVAL; 388 return EINVAL;
390 } 389 }
391 390
392#ifndef CONFIG_XFS_QUOTA 391#ifndef CONFIG_XFS_QUOTA
393 if (XFS_IS_QUOTA_RUNNING(mp)) { 392 if (XFS_IS_QUOTA_RUNNING(mp)) {
394 cmn_err(CE_WARN, 393 xfs_warn(mp, "quota support not available in this kernel.");
395 "XFS: quota support not available in this kernel.");
396 return EINVAL; 394 return EINVAL;
397 } 395 }
398#endif 396#endif
399 397
400 if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && 398 if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
401 (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { 399 (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
402 cmn_err(CE_WARN, 400 xfs_warn(mp, "cannot mount with both project and group quota");
403 "XFS: cannot mount with both project and group quota");
404 return EINVAL; 401 return EINVAL;
405 } 402 }
406 403
407 if ((dsunit && !dswidth) || (!dsunit && dswidth)) { 404 if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
408 cmn_err(CE_WARN, 405 xfs_warn(mp, "sunit and swidth must be specified together");
409 "XFS: sunit and swidth must be specified together");
410 return EINVAL; 406 return EINVAL;
411 } 407 }
412 408
413 if (dsunit && (dswidth % dsunit != 0)) { 409 if (dsunit && (dswidth % dsunit != 0)) {
414 cmn_err(CE_WARN, 410 xfs_warn(mp,
415 "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)", 411 "stripe width (%d) must be a multiple of the stripe unit (%d)",
416 dswidth, dsunit); 412 dswidth, dsunit);
417 return EINVAL; 413 return EINVAL;
418 } 414 }
@@ -438,8 +434,7 @@ done:
438 mp->m_logbufs != 0 && 434 mp->m_logbufs != 0 &&
439 (mp->m_logbufs < XLOG_MIN_ICLOGS || 435 (mp->m_logbufs < XLOG_MIN_ICLOGS ||
440 mp->m_logbufs > XLOG_MAX_ICLOGS)) { 436 mp->m_logbufs > XLOG_MAX_ICLOGS)) {
441 cmn_err(CE_WARN, 437 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
442 "XFS: invalid logbufs value: %d [not %d-%d]",
443 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); 438 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
444 return XFS_ERROR(EINVAL); 439 return XFS_ERROR(EINVAL);
445 } 440 }
@@ -448,22 +443,16 @@ done:
448 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || 443 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
449 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || 444 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
450 !is_power_of_2(mp->m_logbsize))) { 445 !is_power_of_2(mp->m_logbsize))) {
451 cmn_err(CE_WARN, 446 xfs_warn(mp,
452 "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", 447 "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
453 mp->m_logbsize); 448 mp->m_logbsize);
454 return XFS_ERROR(EINVAL); 449 return XFS_ERROR(EINVAL);
455 } 450 }
456 451
457 mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
458 if (!mp->m_fsname)
459 return ENOMEM;
460 mp->m_fsname_len = strlen(mp->m_fsname) + 1;
461
462 if (iosizelog) { 452 if (iosizelog) {
463 if (iosizelog > XFS_MAX_IO_LOG || 453 if (iosizelog > XFS_MAX_IO_LOG ||
464 iosizelog < XFS_MIN_IO_LOG) { 454 iosizelog < XFS_MIN_IO_LOG) {
465 cmn_err(CE_WARN, 455 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
466 "XFS: invalid log iosize: %d [not %d-%d]",
467 iosizelog, XFS_MIN_IO_LOG, 456 iosizelog, XFS_MIN_IO_LOG,
468 XFS_MAX_IO_LOG); 457 XFS_MAX_IO_LOG);
469 return XFS_ERROR(EINVAL); 458 return XFS_ERROR(EINVAL);
@@ -610,7 +599,7 @@ xfs_blkdev_get(
610 mp); 599 mp);
611 if (IS_ERR(*bdevp)) { 600 if (IS_ERR(*bdevp)) {
612 error = PTR_ERR(*bdevp); 601 error = PTR_ERR(*bdevp);
613 printk("XFS: Invalid device [%s], error=%d\n", name, error); 602 xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
614 } 603 }
615 604
616 return -error; 605 return -error;
@@ -664,23 +653,23 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
664 int error; 653 int error;
665 654
666 if (mp->m_logdev_targp != mp->m_ddev_targp) { 655 if (mp->m_logdev_targp != mp->m_ddev_targp) {
667 xfs_fs_cmn_err(CE_NOTE, mp, 656 xfs_notice(mp,
668 "Disabling barriers, not supported with external log device"); 657 "Disabling barriers, not supported with external log device");
669 mp->m_flags &= ~XFS_MOUNT_BARRIER; 658 mp->m_flags &= ~XFS_MOUNT_BARRIER;
670 return; 659 return;
671 } 660 }
672 661
673 if (xfs_readonly_buftarg(mp->m_ddev_targp)) { 662 if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
674 xfs_fs_cmn_err(CE_NOTE, mp, 663 xfs_notice(mp,
675 "Disabling barriers, underlying device is readonly"); 664 "Disabling barriers, underlying device is readonly");
676 mp->m_flags &= ~XFS_MOUNT_BARRIER; 665 mp->m_flags &= ~XFS_MOUNT_BARRIER;
677 return; 666 return;
678 } 667 }
679 668
680 error = xfs_barrier_test(mp); 669 error = xfs_barrier_test(mp);
681 if (error) { 670 if (error) {
682 xfs_fs_cmn_err(CE_NOTE, mp, 671 xfs_notice(mp,
683 "Disabling barriers, trial barrier write failed"); 672 "Disabling barriers, trial barrier write failed");
684 mp->m_flags &= ~XFS_MOUNT_BARRIER; 673 mp->m_flags &= ~XFS_MOUNT_BARRIER;
685 return; 674 return;
686 } 675 }
@@ -743,8 +732,8 @@ xfs_open_devices(
743 goto out_close_logdev; 732 goto out_close_logdev;
744 733
745 if (rtdev == ddev || rtdev == logdev) { 734 if (rtdev == ddev || rtdev == logdev) {
746 cmn_err(CE_WARN, 735 xfs_warn(mp,
747 "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev."); 736 "Cannot mount filesystem with identical rtdev and ddev/logdev.");
748 error = EINVAL; 737 error = EINVAL;
749 goto out_close_rtdev; 738 goto out_close_rtdev;
750 } 739 }
@@ -1089,7 +1078,7 @@ xfs_fs_write_inode(
1089 error = 0; 1078 error = 0;
1090 goto out_unlock; 1079 goto out_unlock;
1091 } 1080 }
1092 error = xfs_iflush(ip, 0); 1081 error = xfs_iflush(ip, SYNC_TRYLOCK);
1093 } 1082 }
1094 1083
1095 out_unlock: 1084 out_unlock:
@@ -1345,8 +1334,8 @@ xfs_fs_remount(
1345 * options that we can't actually change. 1334 * options that we can't actually change.
1346 */ 1335 */
1347#if 0 1336#if 0
1348 printk(KERN_INFO 1337 xfs_info(mp,
1349 "XFS: mount option \"%s\" not supported for remount\n", p); 1338 "mount option \"%s\" not supported for remount\n", p);
1350 return -EINVAL; 1339 return -EINVAL;
1351#else 1340#else
1352 break; 1341 break;
@@ -1367,8 +1356,7 @@ xfs_fs_remount(
1367 if (mp->m_update_flags) { 1356 if (mp->m_update_flags) {
1368 error = xfs_mount_log_sb(mp, mp->m_update_flags); 1357 error = xfs_mount_log_sb(mp, mp->m_update_flags);
1369 if (error) { 1358 if (error) {
1370 cmn_err(CE_WARN, 1359 xfs_warn(mp, "failed to write sb changes");
1371 "XFS: failed to write sb changes");
1372 return error; 1360 return error;
1373 } 1361 }
1374 mp->m_update_flags = 0; 1362 mp->m_update_flags = 0;
@@ -1452,15 +1440,15 @@ xfs_finish_flags(
1452 mp->m_logbsize = mp->m_sb.sb_logsunit; 1440 mp->m_logbsize = mp->m_sb.sb_logsunit;
1453 } else if (mp->m_logbsize > 0 && 1441 } else if (mp->m_logbsize > 0 &&
1454 mp->m_logbsize < mp->m_sb.sb_logsunit) { 1442 mp->m_logbsize < mp->m_sb.sb_logsunit) {
1455 cmn_err(CE_WARN, 1443 xfs_warn(mp,
1456 "XFS: logbuf size must be greater than or equal to log stripe size"); 1444 "logbuf size must be greater than or equal to log stripe size");
1457 return XFS_ERROR(EINVAL); 1445 return XFS_ERROR(EINVAL);
1458 } 1446 }
1459 } else { 1447 } else {
1460 /* Fail a mount if the logbuf is larger than 32K */ 1448 /* Fail a mount if the logbuf is larger than 32K */
1461 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { 1449 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1462 cmn_err(CE_WARN, 1450 xfs_warn(mp,
1463 "XFS: logbuf size for version 1 logs must be 16K or 32K"); 1451 "logbuf size for version 1 logs must be 16K or 32K");
1464 return XFS_ERROR(EINVAL); 1452 return XFS_ERROR(EINVAL);
1465 } 1453 }
1466 } 1454 }
@@ -1477,8 +1465,8 @@ xfs_finish_flags(
1477 * prohibit r/w mounts of read-only filesystems 1465 * prohibit r/w mounts of read-only filesystems
1478 */ 1466 */
1479 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { 1467 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
1480 cmn_err(CE_WARN, 1468 xfs_warn(mp,
1481 "XFS: cannot mount a read-only filesystem as read-write"); 1469 "cannot mount a read-only filesystem as read-write");
1482 return XFS_ERROR(EROFS); 1470 return XFS_ERROR(EROFS);
1483 } 1471 }
1484 1472
@@ -1551,10 +1539,14 @@ xfs_fs_fill_super(
1551 if (error) 1539 if (error)
1552 goto out_free_sb; 1540 goto out_free_sb;
1553 1541
1554 error = xfs_mountfs(mp); 1542 /*
1555 if (error) 1543 * we must configure the block size in the superblock before we run the
1556 goto out_filestream_unmount; 1544 * full mount process as the mount process can lookup and cache inodes.
1557 1545 * For the same reason we must also initialise the syncd and register
1546 * the inode cache shrinker so that inodes can be reclaimed during
1547 * operations like a quotacheck that iterate all inodes in the
1548 * filesystem.
1549 */
1558 sb->s_magic = XFS_SB_MAGIC; 1550 sb->s_magic = XFS_SB_MAGIC;
1559 sb->s_blocksize = mp->m_sb.sb_blocksize; 1551 sb->s_blocksize = mp->m_sb.sb_blocksize;
1560 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1552 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
@@ -1562,6 +1554,16 @@ xfs_fs_fill_super(
1562 sb->s_time_gran = 1; 1554 sb->s_time_gran = 1;
1563 set_posix_acl_flag(sb); 1555 set_posix_acl_flag(sb);
1564 1556
1557 error = xfs_syncd_init(mp);
1558 if (error)
1559 goto out_filestream_unmount;
1560
1561 xfs_inode_shrinker_register(mp);
1562
1563 error = xfs_mountfs(mp);
1564 if (error)
1565 goto out_syncd_stop;
1566
1565 root = igrab(VFS_I(mp->m_rootip)); 1567 root = igrab(VFS_I(mp->m_rootip));
1566 if (!root) { 1568 if (!root) {
1567 error = ENOENT; 1569 error = ENOENT;
@@ -1577,14 +1579,11 @@ xfs_fs_fill_super(
1577 goto fail_vnrele; 1579 goto fail_vnrele;
1578 } 1580 }
1579 1581
1580 error = xfs_syncd_init(mp);
1581 if (error)
1582 goto fail_vnrele;
1583
1584 xfs_inode_shrinker_register(mp);
1585
1586 return 0; 1582 return 0;
1587 1583
1584 out_syncd_stop:
1585 xfs_inode_shrinker_unregister(mp);
1586 xfs_syncd_stop(mp);
1588 out_filestream_unmount: 1587 out_filestream_unmount:
1589 xfs_filestream_unmount(mp); 1588 xfs_filestream_unmount(mp);
1590 out_free_sb: 1589 out_free_sb:
@@ -1608,6 +1607,9 @@ xfs_fs_fill_super(
1608 } 1607 }
1609 1608
1610 fail_unmount: 1609 fail_unmount:
1610 xfs_inode_shrinker_unregister(mp);
1611 xfs_syncd_stop(mp);
1612
1611 /* 1613 /*
1612 * Blow away any referenced inode in the filestreams cache. 1614 * Blow away any referenced inode in the filestreams cache.
1613 * This can and will cause log traffic as inodes go inactive 1615 * This can and will cause log traffic as inodes go inactive
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index e22f0057d21f..594cd822d84d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -425,8 +425,7 @@ xfs_quiesce_attr(
425 /* Push the superblock and write an unmount record */ 425 /* Push the superblock and write an unmount record */
426 error = xfs_log_sbcount(mp, 1); 426 error = xfs_log_sbcount(mp, 1);
427 if (error) 427 if (error)
428 xfs_fs_cmn_err(CE_WARN, mp, 428 xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
429 "xfs_attr_quiesce: failed to log sb changes. "
430 "Frozen image may not be consistent."); 429 "Frozen image may not be consistent.");
431 xfs_log_unmount_write(mp); 430 xfs_log_unmount_write(mp);
432 xfs_unmountfs_writesb(mp); 431 xfs_unmountfs_writesb(mp);
@@ -762,8 +761,10 @@ xfs_reclaim_inode(
762 struct xfs_perag *pag, 761 struct xfs_perag *pag,
763 int sync_mode) 762 int sync_mode)
764{ 763{
765 int error = 0; 764 int error;
766 765
766restart:
767 error = 0;
767 xfs_ilock(ip, XFS_ILOCK_EXCL); 768 xfs_ilock(ip, XFS_ILOCK_EXCL);
768 if (!xfs_iflock_nowait(ip)) { 769 if (!xfs_iflock_nowait(ip)) {
769 if (!(sync_mode & SYNC_WAIT)) 770 if (!(sync_mode & SYNC_WAIT))
@@ -789,9 +790,31 @@ xfs_reclaim_inode(
789 if (xfs_inode_clean(ip)) 790 if (xfs_inode_clean(ip))
790 goto reclaim; 791 goto reclaim;
791 792
792 /* Now we have an inode that needs flushing */ 793 /*
793 error = xfs_iflush(ip, sync_mode); 794 * Now we have an inode that needs flushing.
795 *
796 * We do a nonblocking flush here even if we are doing a SYNC_WAIT
797 * reclaim as we can deadlock with inode cluster removal.
798 * xfs_ifree_cluster() can lock the inode buffer before it locks the
799 * ip->i_lock, and we are doing the exact opposite here. As a result,
800 * doing a blocking xfs_itobp() to get the cluster buffer will result
801 * in an ABBA deadlock with xfs_ifree_cluster().
802 *
803 * As xfs_ifree_cluser() must gather all inodes that are active in the
804 * cache to mark them stale, if we hit this case we don't actually want
805 * to do IO here - we want the inode marked stale so we can simply
806 * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
807 * just unlock the inode, back off and try again. Hopefully the next
808 * pass through will see the stale flag set on the inode.
809 */
810 error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
794 if (sync_mode & SYNC_WAIT) { 811 if (sync_mode & SYNC_WAIT) {
812 if (error == EAGAIN) {
813 xfs_iunlock(ip, XFS_ILOCK_EXCL);
814 /* backoff longer than in xfs_ifree_cluster */
815 delay(2);
816 goto restart;
817 }
795 xfs_iflock(ip); 818 xfs_iflock(ip);
796 goto reclaim; 819 goto reclaim;
797 } 820 }
@@ -806,7 +829,7 @@ xfs_reclaim_inode(
806 * pass on the error. 829 * pass on the error.
807 */ 830 */
808 if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { 831 if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
809 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 832 xfs_warn(ip->i_mount,
810 "inode 0x%llx background reclaim flush failed with %d", 833 "inode 0x%llx background reclaim flush failed with %d",
811 (long long)ip->i_ino, error); 834 (long long)ip->i_ino, error);
812 } 835 }
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index ee3cee097e7e..ee2d2adaa438 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -37,7 +37,7 @@ xfs_stats_clear_proc_handler(
37 ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); 37 ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
38 38
39 if (!ret && write && *valp) { 39 if (!ret && write && *valp) {
40 printk("XFS Clearing xfsstats\n"); 40 xfs_notice(NULL, "Clearing xfsstats");
41 for_each_possible_cpu(c) { 41 for_each_possible_cpu(c) {
42 preempt_disable(); 42 preempt_disable();
43 /* save vn_active, it's a universal truth! */ 43 /* save vn_active, it's a universal truth! */
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index d22aa3103106..7e2416478503 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -544,9 +544,10 @@ xfs_qm_dqtobp(
544 /* 544 /*
545 * A simple sanity check in case we got a corrupted dquot... 545 * A simple sanity check in case we got a corrupted dquot...
546 */ 546 */
547 if (xfs_qm_dqcheck(ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, 547 error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
548 flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), 548 flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
549 "dqtobp")) { 549 "dqtobp");
550 if (error) {
550 if (!(flags & XFS_QMOPT_DQREPAIR)) { 551 if (!(flags & XFS_QMOPT_DQREPAIR)) {
551 xfs_trans_brelse(tp, bp); 552 xfs_trans_brelse(tp, bp);
552 return XFS_ERROR(EIO); 553 return XFS_ERROR(EIO);
@@ -827,7 +828,7 @@ xfs_qm_dqget(
827 if (xfs_do_dqerror) { 828 if (xfs_do_dqerror) {
828 if ((xfs_dqerror_target == mp->m_ddev_targp) && 829 if ((xfs_dqerror_target == mp->m_ddev_targp) &&
829 (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { 830 (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
830 cmn_err(CE_DEBUG, "Returning error in dqget"); 831 xfs_debug(mp, "Returning error in dqget");
831 return (EIO); 832 return (EIO);
832 } 833 }
833 } 834 }
@@ -1207,8 +1208,9 @@ xfs_qm_dqflush(
1207 /* 1208 /*
1208 * A simple sanity check in case we got a corrupted dquot.. 1209 * A simple sanity check in case we got a corrupted dquot..
1209 */ 1210 */
1210 if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), 0, 1211 error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
1211 XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { 1212 XFS_QMOPT_DOWARN, "dqflush (incore copy)");
1213 if (error) {
1212 xfs_buf_relse(bp); 1214 xfs_buf_relse(bp);
1213 xfs_dqfunlock(dqp); 1215 xfs_dqfunlock(dqp);
1214 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1216 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
@@ -1391,8 +1393,8 @@ xfs_qm_dqpurge(
1391 */ 1393 */
1392 error = xfs_qm_dqflush(dqp, SYNC_WAIT); 1394 error = xfs_qm_dqflush(dqp, SYNC_WAIT);
1393 if (error) 1395 if (error)
1394 xfs_fs_cmn_err(CE_WARN, mp, 1396 xfs_warn(mp, "%s: dquot %p flush failed",
1395 "xfs_qm_dqpurge: dquot %p flush failed", dqp); 1397 __func__, dqp);
1396 xfs_dqflock(dqp); 1398 xfs_dqflock(dqp);
1397 } 1399 }
1398 ASSERT(atomic_read(&dqp->q_pincount) == 0); 1400 ASSERT(atomic_read(&dqp->q_pincount) == 0);
@@ -1425,36 +1427,38 @@ xfs_qm_dqpurge(
1425void 1427void
1426xfs_qm_dqprint(xfs_dquot_t *dqp) 1428xfs_qm_dqprint(xfs_dquot_t *dqp)
1427{ 1429{
1428 cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------"); 1430 struct xfs_mount *mp = dqp->q_mount;
1429 cmn_err(CE_DEBUG, "---- dquotID = %d", 1431
1432 xfs_debug(mp, "-----------KERNEL DQUOT----------------");
1433 xfs_debug(mp, "---- dquotID = %d",
1430 (int)be32_to_cpu(dqp->q_core.d_id)); 1434 (int)be32_to_cpu(dqp->q_core.d_id));
1431 cmn_err(CE_DEBUG, "---- type = %s", DQFLAGTO_TYPESTR(dqp)); 1435 xfs_debug(mp, "---- type = %s", DQFLAGTO_TYPESTR(dqp));
1432 cmn_err(CE_DEBUG, "---- fs = 0x%p", dqp->q_mount); 1436 xfs_debug(mp, "---- fs = 0x%p", dqp->q_mount);
1433 cmn_err(CE_DEBUG, "---- blkno = 0x%x", (int) dqp->q_blkno); 1437 xfs_debug(mp, "---- blkno = 0x%x", (int) dqp->q_blkno);
1434 cmn_err(CE_DEBUG, "---- boffset = 0x%x", (int) dqp->q_bufoffset); 1438 xfs_debug(mp, "---- boffset = 0x%x", (int) dqp->q_bufoffset);
1435 cmn_err(CE_DEBUG, "---- blkhlimit = %Lu (0x%x)", 1439 xfs_debug(mp, "---- blkhlimit = %Lu (0x%x)",
1436 be64_to_cpu(dqp->q_core.d_blk_hardlimit), 1440 be64_to_cpu(dqp->q_core.d_blk_hardlimit),
1437 (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit)); 1441 (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit));
1438 cmn_err(CE_DEBUG, "---- blkslimit = %Lu (0x%x)", 1442 xfs_debug(mp, "---- blkslimit = %Lu (0x%x)",
1439 be64_to_cpu(dqp->q_core.d_blk_softlimit), 1443 be64_to_cpu(dqp->q_core.d_blk_softlimit),
1440 (int)be64_to_cpu(dqp->q_core.d_blk_softlimit)); 1444 (int)be64_to_cpu(dqp->q_core.d_blk_softlimit));
1441 cmn_err(CE_DEBUG, "---- inohlimit = %Lu (0x%x)", 1445 xfs_debug(mp, "---- inohlimit = %Lu (0x%x)",
1442 be64_to_cpu(dqp->q_core.d_ino_hardlimit), 1446 be64_to_cpu(dqp->q_core.d_ino_hardlimit),
1443 (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit)); 1447 (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit));
1444 cmn_err(CE_DEBUG, "---- inoslimit = %Lu (0x%x)", 1448 xfs_debug(mp, "---- inoslimit = %Lu (0x%x)",
1445 be64_to_cpu(dqp->q_core.d_ino_softlimit), 1449 be64_to_cpu(dqp->q_core.d_ino_softlimit),
1446 (int)be64_to_cpu(dqp->q_core.d_ino_softlimit)); 1450 (int)be64_to_cpu(dqp->q_core.d_ino_softlimit));
1447 cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)", 1451 xfs_debug(mp, "---- bcount = %Lu (0x%x)",
1448 be64_to_cpu(dqp->q_core.d_bcount), 1452 be64_to_cpu(dqp->q_core.d_bcount),
1449 (int)be64_to_cpu(dqp->q_core.d_bcount)); 1453 (int)be64_to_cpu(dqp->q_core.d_bcount));
1450 cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)", 1454 xfs_debug(mp, "---- icount = %Lu (0x%x)",
1451 be64_to_cpu(dqp->q_core.d_icount), 1455 be64_to_cpu(dqp->q_core.d_icount),
1452 (int)be64_to_cpu(dqp->q_core.d_icount)); 1456 (int)be64_to_cpu(dqp->q_core.d_icount));
1453 cmn_err(CE_DEBUG, "---- btimer = %d", 1457 xfs_debug(mp, "---- btimer = %d",
1454 (int)be32_to_cpu(dqp->q_core.d_btimer)); 1458 (int)be32_to_cpu(dqp->q_core.d_btimer));
1455 cmn_err(CE_DEBUG, "---- itimer = %d", 1459 xfs_debug(mp, "---- itimer = %d",
1456 (int)be32_to_cpu(dqp->q_core.d_itimer)); 1460 (int)be32_to_cpu(dqp->q_core.d_itimer));
1457 cmn_err(CE_DEBUG, "---------------------------"); 1461 xfs_debug(mp, "---------------------------");
1458} 1462}
1459#endif 1463#endif
1460 1464
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 2a1f3dc10a02..9e0e2fa3f2c8 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -136,9 +136,8 @@ xfs_qm_dquot_logitem_push(
136 */ 136 */
137 error = xfs_qm_dqflush(dqp, 0); 137 error = xfs_qm_dqflush(dqp, 0);
138 if (error) 138 if (error)
139 xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 139 xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
140 "xfs_qm_dquot_logitem_push: push error %d on dqp %p", 140 __func__, error, dqp);
141 error, dqp);
142 xfs_dqunlock(dqp); 141 xfs_dqunlock(dqp);
143} 142}
144 143
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 206a2815ced6..254ee062bd7d 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -80,7 +80,7 @@ xfs_qm_dquot_list_print(
80 int i = 0; 80 int i = 0;
81 81
82 list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) { 82 list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) {
83 cmn_err(CE_DEBUG, " %d. \"%d (%s)\" " 83 xfs_debug(mp, " %d. \"%d (%s)\" "
84 "bcnt = %lld, icnt = %lld, refs = %d", 84 "bcnt = %lld, icnt = %lld, refs = %d",
85 i++, be32_to_cpu(dqp->q_core.d_id), 85 i++, be32_to_cpu(dqp->q_core.d_id),
86 DQFLAGTO_TYPESTR(dqp), 86 DQFLAGTO_TYPESTR(dqp),
@@ -205,7 +205,7 @@ xfs_qm_destroy(
205 list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { 205 list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
206 xfs_dqlock(dqp); 206 xfs_dqlock(dqp);
207#ifdef QUOTADEBUG 207#ifdef QUOTADEBUG
208 cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp); 208 xfs_debug(dqp->q_mount, "FREELIST destroy 0x%p", dqp);
209#endif 209#endif
210 list_del_init(&dqp->q_freelist); 210 list_del_init(&dqp->q_freelist);
211 xfs_Gqm->qm_dqfrlist_cnt--; 211 xfs_Gqm->qm_dqfrlist_cnt--;
@@ -341,9 +341,7 @@ xfs_qm_mount_quotas(
341 * quotas immediately. 341 * quotas immediately.
342 */ 342 */
343 if (mp->m_sb.sb_rextents) { 343 if (mp->m_sb.sb_rextents) {
344 cmn_err(CE_NOTE, 344 xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
345 "Cannot turn on quotas for realtime filesystem %s",
346 mp->m_fsname);
347 mp->m_qflags = 0; 345 mp->m_qflags = 0;
348 goto write_changes; 346 goto write_changes;
349 } 347 }
@@ -402,14 +400,13 @@ xfs_qm_mount_quotas(
402 * off, but the on disk superblock doesn't know that ! 400 * off, but the on disk superblock doesn't know that !
403 */ 401 */
404 ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); 402 ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
405 xfs_fs_cmn_err(CE_ALERT, mp, 403 xfs_alert(mp, "%s: Superblock update failed!",
406 "XFS mount_quotas: Superblock update failed!"); 404 __func__);
407 } 405 }
408 } 406 }
409 407
410 if (error) { 408 if (error) {
411 xfs_fs_cmn_err(CE_WARN, mp, 409 xfs_warn(mp, "Failed to initialize disk quotas.");
412 "Failed to initialize disk quotas.");
413 return; 410 return;
414 } 411 }
415 412
@@ -1230,13 +1227,6 @@ xfs_qm_qino_alloc(
1230 } 1227 }
1231 1228
1232 /* 1229 /*
1233 * Keep an extra reference to this quota inode. This inode is
1234 * locked exclusively and joined to the transaction already.
1235 */
1236 ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
1237 IHOLD(*ip);
1238
1239 /*
1240 * Make the changes in the superblock, and log those too. 1230 * Make the changes in the superblock, and log those too.
1241 * sbfields arg may contain fields other than *QUOTINO; 1231 * sbfields arg may contain fields other than *QUOTINO;
1242 * VERSIONNUM for example. 1232 * VERSIONNUM for example.
@@ -1264,7 +1254,7 @@ xfs_qm_qino_alloc(
1264 xfs_mod_sb(tp, sbfields); 1254 xfs_mod_sb(tp, sbfields);
1265 1255
1266 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { 1256 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
1267 xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!"); 1257 xfs_alert(mp, "%s failed (error %d)!", __func__, error);
1268 return error; 1258 return error;
1269 } 1259 }
1270 return 0; 1260 return 0;
@@ -1299,7 +1289,7 @@ xfs_qm_reset_dqcounts(
1299 * output any warnings because it's perfectly possible to 1289 * output any warnings because it's perfectly possible to
1300 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. 1290 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
1301 */ 1291 */
1302 (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR, 1292 (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
1303 "xfs_quotacheck"); 1293 "xfs_quotacheck");
1304 ddq->d_bcount = 0; 1294 ddq->d_bcount = 0;
1305 ddq->d_icount = 0; 1295 ddq->d_icount = 0;
@@ -1676,7 +1666,7 @@ xfs_qm_quotacheck(
1676 */ 1666 */
1677 ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist)); 1667 ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
1678 1668
1679 cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname); 1669 xfs_notice(mp, "Quotacheck needed: Please wait.");
1680 1670
1681 /* 1671 /*
1682 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset 1672 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
@@ -1754,9 +1744,9 @@ xfs_qm_quotacheck(
1754 1744
1755 error_return: 1745 error_return:
1756 if (error) { 1746 if (error) {
1757 cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): " 1747 xfs_warn(mp,
1758 "Disabling quotas.", 1748 "Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
1759 mp->m_fsname, error); 1749 error);
1760 /* 1750 /*
1761 * We must turn off quotas. 1751 * We must turn off quotas.
1762 */ 1752 */
@@ -1764,12 +1754,11 @@ xfs_qm_quotacheck(
1764 ASSERT(xfs_Gqm != NULL); 1754 ASSERT(xfs_Gqm != NULL);
1765 xfs_qm_destroy_quotainfo(mp); 1755 xfs_qm_destroy_quotainfo(mp);
1766 if (xfs_mount_reset_sbqflags(mp)) { 1756 if (xfs_mount_reset_sbqflags(mp)) {
1767 cmn_err(CE_WARN, "XFS quotacheck %s: " 1757 xfs_warn(mp,
1768 "Failed to reset quota flags.", mp->m_fsname); 1758 "Quotacheck: Failed to reset quota flags.");
1769 } 1759 }
1770 } else { 1760 } else
1771 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); 1761 xfs_notice(mp, "Quotacheck: Done.");
1772 }
1773 return (error); 1762 return (error);
1774} 1763}
1775 1764
@@ -1937,8 +1926,8 @@ again:
1937 */ 1926 */
1938 error = xfs_qm_dqflush(dqp, 0); 1927 error = xfs_qm_dqflush(dqp, 0);
1939 if (error) { 1928 if (error) {
1940 xfs_fs_cmn_err(CE_WARN, mp, 1929 xfs_warn(mp, "%s: dquot %p flush failed",
1941 "xfs_qm_dqreclaim: dquot %p flush failed", dqp); 1930 __func__, dqp);
1942 } 1931 }
1943 goto dqunlock; 1932 goto dqunlock;
1944 } 1933 }
@@ -2115,7 +2104,7 @@ xfs_qm_write_sb_changes(
2115 int error; 2104 int error;
2116 2105
2117#ifdef QUOTADEBUG 2106#ifdef QUOTADEBUG
2118 cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname); 2107 xfs_notice(mp, "Writing superblock quota changes");
2119#endif 2108#endif
2120 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 2109 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
2121 if ((error = xfs_trans_reserve(tp, 0, 2110 if ((error = xfs_trans_reserve(tp, 0,
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 45b5cb1788ab..774d7ec6df8e 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -119,8 +119,7 @@ xfs_qm_newmount(
119 (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || 119 (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
120 (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && 120 (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) &&
121 xfs_dev_is_read_only(mp, "changing quota state")) { 121 xfs_dev_is_read_only(mp, "changing quota state")) {
122 cmn_err(CE_WARN, 122 xfs_warn(mp, "please mount with%s%s%s%s.",
123 "XFS: please mount with%s%s%s%s.",
124 (!quotaondisk ? "out quota" : ""), 123 (!quotaondisk ? "out quota" : ""),
125 (uquotaondisk ? " usrquota" : ""), 124 (uquotaondisk ? " usrquota" : ""),
126 (pquotaondisk ? " prjquota" : ""), 125 (pquotaondisk ? " prjquota" : ""),
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index bdebc183223e..c82f06778a27 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -41,12 +41,6 @@
41#include "xfs_qm.h" 41#include "xfs_qm.h"
42#include "xfs_trace.h" 42#include "xfs_trace.h"
43 43
44#ifdef DEBUG
45# define qdprintk(s, args...) cmn_err(CE_DEBUG, s, ## args)
46#else
47# define qdprintk(s, args...) do { } while (0)
48#endif
49
50STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); 44STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
51STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, 45STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
52 uint); 46 uint);
@@ -294,7 +288,8 @@ xfs_qm_scall_trunc_qfiles(
294 int error = 0, error2 = 0; 288 int error = 0, error2 = 0;
295 289
296 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { 290 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
297 qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); 291 xfs_debug(mp, "%s: flags=%x m_qflags=%x\n",
292 __func__, flags, mp->m_qflags);
298 return XFS_ERROR(EINVAL); 293 return XFS_ERROR(EINVAL);
299 } 294 }
300 295
@@ -331,7 +326,8 @@ xfs_qm_scall_quotaon(
331 sbflags = 0; 326 sbflags = 0;
332 327
333 if (flags == 0) { 328 if (flags == 0) {
334 qdprintk("quotaon: zero flags, m_qflags=%x\n", mp->m_qflags); 329 xfs_debug(mp, "%s: zero flags, m_qflags=%x\n",
330 __func__, mp->m_qflags);
335 return XFS_ERROR(EINVAL); 331 return XFS_ERROR(EINVAL);
336 } 332 }
337 333
@@ -352,8 +348,9 @@ xfs_qm_scall_quotaon(
352 (flags & XFS_GQUOTA_ACCT) == 0 && 348 (flags & XFS_GQUOTA_ACCT) == 0 &&
353 (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && 349 (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
354 (flags & XFS_OQUOTA_ENFD))) { 350 (flags & XFS_OQUOTA_ENFD))) {
355 qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n", 351 xfs_debug(mp,
356 flags, mp->m_sb.sb_qflags); 352 "%s: Can't enforce without acct, flags=%x sbflags=%x\n",
353 __func__, flags, mp->m_sb.sb_qflags);
357 return XFS_ERROR(EINVAL); 354 return XFS_ERROR(EINVAL);
358 } 355 }
359 /* 356 /*
@@ -541,7 +538,7 @@ xfs_qm_scall_setqlim(
541 q->qi_bsoftlimit = soft; 538 q->qi_bsoftlimit = soft;
542 } 539 }
543 } else { 540 } else {
544 qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft); 541 xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft);
545 } 542 }
546 hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? 543 hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
547 (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : 544 (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
@@ -557,7 +554,7 @@ xfs_qm_scall_setqlim(
557 q->qi_rtbsoftlimit = soft; 554 q->qi_rtbsoftlimit = soft;
558 } 555 }
559 } else { 556 } else {
560 qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft); 557 xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
561 } 558 }
562 559
563 hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? 560 hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
@@ -574,7 +571,7 @@ xfs_qm_scall_setqlim(
574 q->qi_isoftlimit = soft; 571 q->qi_isoftlimit = soft;
575 } 572 }
576 } else { 573 } else {
577 qdprintk("ihard %Ld < isoft %Ld\n", hard, soft); 574 xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft);
578 } 575 }
579 576
580 /* 577 /*
@@ -939,10 +936,11 @@ struct mutex qcheck_lock;
939#define DQTEST_LIST_PRINT(l, NXT, title) \ 936#define DQTEST_LIST_PRINT(l, NXT, title) \
940{ \ 937{ \
941 xfs_dqtest_t *dqp; int i = 0;\ 938 xfs_dqtest_t *dqp; int i = 0;\
942 cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \ 939 xfs_debug(NULL, "%s (#%d)", title, (int) (l)->qh_nelems); \
943 for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \ 940 for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \
944 dqp = (xfs_dqtest_t *)dqp->NXT) { \ 941 dqp = (xfs_dqtest_t *)dqp->NXT) { \
945 cmn_err(CE_DEBUG, " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \ 942 xfs_debug(dqp->q_mount, \
943 " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \
946 ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \ 944 ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \
947 dqp->d_bcount, dqp->d_icount); } \ 945 dqp->d_bcount, dqp->d_icount); } \
948} 946}
@@ -966,16 +964,17 @@ xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp)
966} 964}
967STATIC void 965STATIC void
968xfs_qm_dqtest_print( 966xfs_qm_dqtest_print(
969 xfs_dqtest_t *d) 967 struct xfs_mount *mp,
968 struct dqtest *d)
970{ 969{
971 cmn_err(CE_DEBUG, "-----------DQTEST DQUOT----------------"); 970 xfs_debug(mp, "-----------DQTEST DQUOT----------------");
972 cmn_err(CE_DEBUG, "---- dquot ID = %d", d->d_id); 971 xfs_debug(mp, "---- dquot ID = %d", d->d_id);
973 cmn_err(CE_DEBUG, "---- fs = 0x%p", d->q_mount); 972 xfs_debug(mp, "---- fs = 0x%p", d->q_mount);
974 cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)", 973 xfs_debug(mp, "---- bcount = %Lu (0x%x)",
975 d->d_bcount, (int)d->d_bcount); 974 d->d_bcount, (int)d->d_bcount);
976 cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)", 975 xfs_debug(mp, "---- icount = %Lu (0x%x)",
977 d->d_icount, (int)d->d_icount); 976 d->d_icount, (int)d->d_icount);
978 cmn_err(CE_DEBUG, "---------------------------"); 977 xfs_debug(mp, "---------------------------");
979} 978}
980 979
981STATIC void 980STATIC void
@@ -989,12 +988,14 @@ xfs_qm_dqtest_failed(
989{ 988{
990 qmtest_nfails++; 989 qmtest_nfails++;
991 if (error) 990 if (error)
992 cmn_err(CE_DEBUG, "quotacheck failed id=%d, err=%d\nreason: %s", 991 xfs_debug(dqp->q_mount,
993 d->d_id, error, reason); 992 "quotacheck failed id=%d, err=%d\nreason: %s",
993 d->d_id, error, reason);
994 else 994 else
995 cmn_err(CE_DEBUG, "quotacheck failed id=%d (%s) [%d != %d]", 995 xfs_debug(dqp->q_mount,
996 d->d_id, reason, (int)a, (int)b); 996 "quotacheck failed id=%d (%s) [%d != %d]",
997 xfs_qm_dqtest_print(d); 997 d->d_id, reason, (int)a, (int)b);
998 xfs_qm_dqtest_print(dqp->q_mount, d);
998 if (dqp) 999 if (dqp)
999 xfs_qm_dqprint(dqp); 1000 xfs_qm_dqprint(dqp);
1000} 1001}
@@ -1021,9 +1022,9 @@ xfs_dqtest_cmp2(
1021 be64_to_cpu(dqp->q_core.d_bcount) >= 1022 be64_to_cpu(dqp->q_core.d_bcount) >=
1022 be64_to_cpu(dqp->q_core.d_blk_softlimit)) { 1023 be64_to_cpu(dqp->q_core.d_blk_softlimit)) {
1023 if (!dqp->q_core.d_btimer && dqp->q_core.d_id) { 1024 if (!dqp->q_core.d_btimer && dqp->q_core.d_id) {
1024 cmn_err(CE_DEBUG, 1025 xfs_debug(dqp->q_mount,
1025 "%d [%s] [0x%p] BLK TIMER NOT STARTED", 1026 "%d [%s] BLK TIMER NOT STARTED",
1026 d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); 1027 d->d_id, DQFLAGTO_TYPESTR(d));
1027 err++; 1028 err++;
1028 } 1029 }
1029 } 1030 }
@@ -1031,16 +1032,16 @@ xfs_dqtest_cmp2(
1031 be64_to_cpu(dqp->q_core.d_icount) >= 1032 be64_to_cpu(dqp->q_core.d_icount) >=
1032 be64_to_cpu(dqp->q_core.d_ino_softlimit)) { 1033 be64_to_cpu(dqp->q_core.d_ino_softlimit)) {
1033 if (!dqp->q_core.d_itimer && dqp->q_core.d_id) { 1034 if (!dqp->q_core.d_itimer && dqp->q_core.d_id) {
1034 cmn_err(CE_DEBUG, 1035 xfs_debug(dqp->q_mount,
1035 "%d [%s] [0x%p] INO TIMER NOT STARTED", 1036 "%d [%s] INO TIMER NOT STARTED",
1036 d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); 1037 d->d_id, DQFLAGTO_TYPESTR(d));
1037 err++; 1038 err++;
1038 } 1039 }
1039 } 1040 }
1040#ifdef QUOTADEBUG 1041#ifdef QUOTADEBUG
1041 if (!err) { 1042 if (!err) {
1042 cmn_err(CE_DEBUG, "%d [%s] [0x%p] qchecked", 1043 xfs_debug(dqp->q_mount, "%d [%s] qchecked",
1043 d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); 1044 d->d_id, DQFLAGTO_TYPESTR(d));
1044 } 1045 }
1045#endif 1046#endif
1046 return (err); 1047 return (err);
@@ -1137,8 +1138,8 @@ xfs_qm_internalqcheck_adjust(
1137 1138
1138 if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { 1139 if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
1139 *res = BULKSTAT_RV_NOTHING; 1140 *res = BULKSTAT_RV_NOTHING;
1140 qdprintk("internalqcheck: ino=%llu, uqino=%llu, gqino=%llu\n", 1141 xfs_debug(mp, "%s: ino=%llu, uqino=%llu, gqino=%llu\n",
1141 (unsigned long long) ino, 1142 __func__, (unsigned long long) ino,
1142 (unsigned long long) mp->m_sb.sb_uquotino, 1143 (unsigned long long) mp->m_sb.sb_uquotino,
1143 (unsigned long long) mp->m_sb.sb_gquotino); 1144 (unsigned long long) mp->m_sb.sb_gquotino);
1144 return XFS_ERROR(EINVAL); 1145 return XFS_ERROR(EINVAL);
@@ -1223,12 +1224,12 @@ xfs_qm_internalqcheck(
1223 xfs_qm_internalqcheck_adjust, 1224 xfs_qm_internalqcheck_adjust,
1224 0, NULL, &done); 1225 0, NULL, &done);
1225 if (error) { 1226 if (error) {
1226 cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error); 1227 xfs_debug(mp, "Bulkstat returned error 0x%x", error);
1227 break; 1228 break;
1228 } 1229 }
1229 } while (!done); 1230 } while (!done);
1230 1231
1231 cmn_err(CE_DEBUG, "Checking results against system dquots"); 1232 xfs_debug(mp, "Checking results against system dquots");
1232 for (i = 0; i < qmtest_hashmask; i++) { 1233 for (i = 0; i < qmtest_hashmask; i++) {
1233 xfs_dqtest_t *d, *n; 1234 xfs_dqtest_t *d, *n;
1234 xfs_dqhash_t *h; 1235 xfs_dqhash_t *h;
@@ -1246,10 +1247,10 @@ xfs_qm_internalqcheck(
1246 } 1247 }
1247 1248
1248 if (qmtest_nfails) { 1249 if (qmtest_nfails) {
1249 cmn_err(CE_DEBUG, "******** quotacheck failed ********"); 1250 xfs_debug(mp, "******** quotacheck failed ********");
1250 cmn_err(CE_DEBUG, "failures = %d", qmtest_nfails); 1251 xfs_debug(mp, "failures = %d", qmtest_nfails);
1251 } else { 1252 } else {
1252 cmn_err(CE_DEBUG, "******** quotacheck successful! ********"); 1253 xfs_debug(mp, "******** quotacheck successful! ********");
1253 } 1254 }
1254 kmem_free(qmtest_udqtab); 1255 kmem_free(qmtest_udqtab);
1255 kmem_free(qmtest_gdqtab); 1256 kmem_free(qmtest_gdqtab);
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 7de91d1b75c0..2a3648731331 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -643,8 +643,9 @@ xfs_trans_dqresv(
643 (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && 643 (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
644 (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { 644 (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
645#ifdef QUOTADEBUG 645#ifdef QUOTADEBUG
646 cmn_err(CE_DEBUG, "BLK Res: nblks=%ld + resbcount=%Ld" 646 xfs_debug(mp,
647 " > hardlimit=%Ld?", nblks, *resbcountp, hardlimit); 647 "BLK Res: nblks=%ld + resbcount=%Ld > hardlimit=%Ld?",
648 nblks, *resbcountp, hardlimit);
648#endif 649#endif
649 if (nblks > 0) { 650 if (nblks > 0) {
650 /* 651 /*
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
deleted file mode 100644
index 0df88897ef84..000000000000
--- a/fs/xfs/support/debug.c
+++ /dev/null
@@ -1,107 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include <xfs.h>
19#include "debug.h"
20
21/* xfs_mount.h drags a lot of crap in, sorry.. */
22#include "xfs_sb.h"
23#include "xfs_inum.h"
24#include "xfs_ag.h"
25#include "xfs_mount.h"
26#include "xfs_error.h"
27
28void
29cmn_err(
30 const char *lvl,
31 const char *fmt,
32 ...)
33{
34 struct va_format vaf;
35 va_list args;
36
37 va_start(args, fmt);
38 vaf.fmt = fmt;
39 vaf.va = &args;
40
41 printk("%s%pV", lvl, &vaf);
42 va_end(args);
43
44 BUG_ON(strncmp(lvl, KERN_EMERG, strlen(KERN_EMERG)) == 0);
45}
46
47void
48xfs_fs_cmn_err(
49 const char *lvl,
50 struct xfs_mount *mp,
51 const char *fmt,
52 ...)
53{
54 struct va_format vaf;
55 va_list args;
56
57 va_start(args, fmt);
58 vaf.fmt = fmt;
59 vaf.va = &args;
60
61 printk("%sFilesystem %s: %pV", lvl, mp->m_fsname, &vaf);
62 va_end(args);
63
64 BUG_ON(strncmp(lvl, KERN_EMERG, strlen(KERN_EMERG)) == 0);
65}
66
67/* All callers to xfs_cmn_err use CE_ALERT, so don't bother testing lvl */
68void
69xfs_cmn_err(
70 int panic_tag,
71 const char *lvl,
72 struct xfs_mount *mp,
73 const char *fmt,
74 ...)
75{
76 struct va_format vaf;
77 va_list args;
78 int do_panic = 0;
79
80 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
81 printk(KERN_ALERT "XFS: Transforming an alert into a BUG.");
82 do_panic = 1;
83 }
84
85 va_start(args, fmt);
86 vaf.fmt = fmt;
87 vaf.va = &args;
88
89 printk(KERN_ALERT "Filesystem %s: %pV", mp->m_fsname, &vaf);
90 va_end(args);
91
92 BUG_ON(do_panic);
93}
94
95void
96assfail(char *expr, char *file, int line)
97{
98 printk(KERN_CRIT "Assertion failed: %s, file: %s, line: %d\n", expr,
99 file, line);
100 BUG();
101}
102
103void
104xfs_hex_dump(void *p, int length)
105{
106 print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
107}
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
deleted file mode 100644
index 05699f67d475..000000000000
--- a/fs/xfs/support/debug.h
+++ /dev/null
@@ -1,61 +0,0 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_SUPPORT_DEBUG_H__
19#define __XFS_SUPPORT_DEBUG_H__
20
21#include <stdarg.h>
22
23struct xfs_mount;
24
25#define CE_DEBUG KERN_DEBUG
26#define CE_CONT KERN_INFO
27#define CE_NOTE KERN_NOTICE
28#define CE_WARN KERN_WARNING
29#define CE_ALERT KERN_ALERT
30#define CE_PANIC KERN_EMERG
31
32void cmn_err(const char *lvl, const char *fmt, ...)
33 __attribute__ ((format (printf, 2, 3)));
34void xfs_fs_cmn_err( const char *lvl, struct xfs_mount *mp,
35 const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
36void xfs_cmn_err( int panic_tag, const char *lvl, struct xfs_mount *mp,
37 const char *fmt, ...) __attribute__ ((format (printf, 4, 5)));
38
39extern void assfail(char *expr, char *f, int l);
40
41#define ASSERT_ALWAYS(expr) \
42 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
43
44#ifndef DEBUG
45#define ASSERT(expr) ((void)0)
46
47#ifndef STATIC
48# define STATIC static noinline
49#endif
50
51#else /* DEBUG */
52
53#define ASSERT(expr) \
54 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
55
56#ifndef STATIC
57# define STATIC noinline
58#endif
59
60#endif /* DEBUG */
61#endif /* __XFS_SUPPORT_DEBUG_H__ */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index f3227984a9bf..4bc3c649aee4 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -147,10 +147,9 @@ xfs_alloc_get_rec(
147 */ 147 */
148STATIC void 148STATIC void
149xfs_alloc_compute_aligned( 149xfs_alloc_compute_aligned(
150 xfs_alloc_arg_t *args, /* allocation argument structure */
150 xfs_agblock_t foundbno, /* starting block in found extent */ 151 xfs_agblock_t foundbno, /* starting block in found extent */
151 xfs_extlen_t foundlen, /* length in found extent */ 152 xfs_extlen_t foundlen, /* length in found extent */
152 xfs_extlen_t alignment, /* alignment for allocation */
153 xfs_extlen_t minlen, /* minimum length for allocation */
154 xfs_agblock_t *resbno, /* result block number */ 153 xfs_agblock_t *resbno, /* result block number */
155 xfs_extlen_t *reslen) /* result length */ 154 xfs_extlen_t *reslen) /* result length */
156{ 155{
@@ -158,8 +157,8 @@ xfs_alloc_compute_aligned(
158 xfs_extlen_t diff; 157 xfs_extlen_t diff;
159 xfs_extlen_t len; 158 xfs_extlen_t len;
160 159
161 if (alignment > 1 && foundlen >= minlen) { 160 if (args->alignment > 1 && foundlen >= args->minlen) {
162 bno = roundup(foundbno, alignment); 161 bno = roundup(foundbno, args->alignment);
163 diff = bno - foundbno; 162 diff = bno - foundbno;
164 len = diff >= foundlen ? 0 : foundlen - diff; 163 len = diff >= foundlen ? 0 : foundlen - diff;
165 } else { 164 } else {
@@ -464,6 +463,27 @@ xfs_alloc_read_agfl(
464 return 0; 463 return 0;
465} 464}
466 465
466STATIC int
467xfs_alloc_update_counters(
468 struct xfs_trans *tp,
469 struct xfs_perag *pag,
470 struct xfs_buf *agbp,
471 long len)
472{
473 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
474
475 pag->pagf_freeblks += len;
476 be32_add_cpu(&agf->agf_freeblks, len);
477
478 xfs_trans_agblocks_delta(tp, len);
479 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
480 be32_to_cpu(agf->agf_length)))
481 return EFSCORRUPTED;
482
483 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
484 return 0;
485}
486
467/* 487/*
468 * Allocation group level functions. 488 * Allocation group level functions.
469 */ 489 */
@@ -505,49 +525,44 @@ xfs_alloc_ag_vextent(
505 ASSERT(0); 525 ASSERT(0);
506 /* NOTREACHED */ 526 /* NOTREACHED */
507 } 527 }
508 if (error) 528
529 if (error || args->agbno == NULLAGBLOCK)
509 return error; 530 return error;
510 /*
511 * If the allocation worked, need to change the agf structure
512 * (and log it), and the superblock.
513 */
514 if (args->agbno != NULLAGBLOCK) {
515 xfs_agf_t *agf; /* allocation group freelist header */
516 long slen = (long)args->len;
517 531
518 ASSERT(args->len >= args->minlen && args->len <= args->maxlen); 532 ASSERT(args->len >= args->minlen);
519 ASSERT(!(args->wasfromfl) || !args->isfl); 533 ASSERT(args->len <= args->maxlen);
520 ASSERT(args->agbno % args->alignment == 0); 534 ASSERT(!args->wasfromfl || !args->isfl);
521 if (!(args->wasfromfl)) { 535 ASSERT(args->agbno % args->alignment == 0);
522 536
523 agf = XFS_BUF_TO_AGF(args->agbp); 537 if (!args->wasfromfl) {
524 be32_add_cpu(&agf->agf_freeblks, -(args->len)); 538 error = xfs_alloc_update_counters(args->tp, args->pag,
525 xfs_trans_agblocks_delta(args->tp, 539 args->agbp,
526 -((long)(args->len))); 540 -((long)(args->len)));
527 args->pag->pagf_freeblks -= args->len; 541 if (error)
528 ASSERT(be32_to_cpu(agf->agf_freeblks) <= 542 return error;
529 be32_to_cpu(agf->agf_length)); 543
530 xfs_alloc_log_agf(args->tp, args->agbp, 544 /*
531 XFS_AGF_FREEBLKS); 545 * Search the busylist for these blocks and mark the
532 /* 546 * transaction as synchronous if blocks are found. This
533 * Search the busylist for these blocks and mark the 547 * avoids the need to block due to a synchronous log
534 * transaction as synchronous if blocks are found. This 548 * force to ensure correct ordering as the synchronous
535 * avoids the need to block due to a synchronous log 549 * transaction will guarantee that for us.
536 * force to ensure correct ordering as the synchronous 550 */
537 * transaction will guarantee that for us. 551 if (xfs_alloc_busy_search(args->mp, args->agno,
538 */ 552 args->agbno, args->len))
539 if (xfs_alloc_busy_search(args->mp, args->agno, 553 xfs_trans_set_sync(args->tp);
540 args->agbno, args->len))
541 xfs_trans_set_sync(args->tp);
542 }
543 if (!args->isfl)
544 xfs_trans_mod_sb(args->tp,
545 args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
546 XFS_TRANS_SB_FDBLOCKS, -slen);
547 XFS_STATS_INC(xs_allocx);
548 XFS_STATS_ADD(xs_allocb, args->len);
549 } 554 }
550 return 0; 555
556 if (!args->isfl) {
557 xfs_trans_mod_sb(args->tp, args->wasdel ?
558 XFS_TRANS_SB_RES_FDBLOCKS :
559 XFS_TRANS_SB_FDBLOCKS,
560 -((long)(args->len)));
561 }
562
563 XFS_STATS_INC(xs_allocx);
564 XFS_STATS_ADD(xs_allocb, args->len);
565 return error;
551} 566}
552 567
553/* 568/*
@@ -693,8 +708,7 @@ xfs_alloc_find_best_extent(
693 if (error) 708 if (error)
694 goto error0; 709 goto error0;
695 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 710 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
696 xfs_alloc_compute_aligned(*sbno, *slen, args->alignment, 711 xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena);
697 args->minlen, &bno, slena);
698 712
699 /* 713 /*
700 * The good extent is closer than this one. 714 * The good extent is closer than this one.
@@ -866,8 +880,8 @@ xfs_alloc_ag_vextent_near(
866 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i))) 880 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
867 goto error0; 881 goto error0;
868 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 882 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
869 xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, 883 xfs_alloc_compute_aligned(args, ltbno, ltlen,
870 args->minlen, &ltbnoa, &ltlena); 884 &ltbnoa, &ltlena);
871 if (ltlena < args->minlen) 885 if (ltlena < args->minlen)
872 continue; 886 continue;
873 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 887 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
@@ -987,8 +1001,8 @@ xfs_alloc_ag_vextent_near(
987 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i))) 1001 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
988 goto error0; 1002 goto error0;
989 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1003 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
990 xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, 1004 xfs_alloc_compute_aligned(args, ltbno, ltlen,
991 args->minlen, &ltbnoa, &ltlena); 1005 &ltbnoa, &ltlena);
992 if (ltlena >= args->minlen) 1006 if (ltlena >= args->minlen)
993 break; 1007 break;
994 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) 1008 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
@@ -1003,8 +1017,8 @@ xfs_alloc_ag_vextent_near(
1003 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i))) 1017 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1004 goto error0; 1018 goto error0;
1005 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1019 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1006 xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment, 1020 xfs_alloc_compute_aligned(args, gtbno, gtlen,
1007 args->minlen, &gtbnoa, &gtlena); 1021 &gtbnoa, &gtlena);
1008 if (gtlena >= args->minlen) 1022 if (gtlena >= args->minlen)
1009 break; 1023 break;
1010 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 1024 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
@@ -1183,8 +1197,7 @@ xfs_alloc_ag_vextent_size(
1183 * once aligned; if not, we search left for something better. 1197 * once aligned; if not, we search left for something better.
1184 * This can't happen in the second case above. 1198 * This can't happen in the second case above.
1185 */ 1199 */
1186 xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen, 1200 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1187 &rbno, &rlen);
1188 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1201 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1189 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1202 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1190 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1203 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
@@ -1209,8 +1222,8 @@ xfs_alloc_ag_vextent_size(
1209 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1222 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1210 if (flen < bestrlen) 1223 if (flen < bestrlen)
1211 break; 1224 break;
1212 xfs_alloc_compute_aligned(fbno, flen, args->alignment, 1225 xfs_alloc_compute_aligned(args, fbno, flen,
1213 args->minlen, &rbno, &rlen); 1226 &rbno, &rlen);
1214 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1227 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1215 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1228 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1216 (rlen <= flen && rbno + rlen <= fbno + flen), 1229 (rlen <= flen && rbno + rlen <= fbno + flen),
@@ -1388,6 +1401,7 @@ xfs_free_ag_extent(
1388 xfs_mount_t *mp; /* mount point struct for filesystem */ 1401 xfs_mount_t *mp; /* mount point struct for filesystem */
1389 xfs_agblock_t nbno; /* new starting block of freespace */ 1402 xfs_agblock_t nbno; /* new starting block of freespace */
1390 xfs_extlen_t nlen; /* new length of freespace */ 1403 xfs_extlen_t nlen; /* new length of freespace */
1404 xfs_perag_t *pag; /* per allocation group data */
1391 1405
1392 mp = tp->t_mountp; 1406 mp = tp->t_mountp;
1393 /* 1407 /*
@@ -1586,30 +1600,20 @@ xfs_free_ag_extent(
1586 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1600 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1587 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1601 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1588 cnt_cur = NULL; 1602 cnt_cur = NULL;
1603
1589 /* 1604 /*
1590 * Update the freespace totals in the ag and superblock. 1605 * Update the freespace totals in the ag and superblock.
1591 */ 1606 */
1592 { 1607 pag = xfs_perag_get(mp, agno);
1593 xfs_agf_t *agf; 1608 error = xfs_alloc_update_counters(tp, pag, agbp, len);
1594 xfs_perag_t *pag; /* per allocation group data */ 1609 xfs_perag_put(pag);
1595 1610 if (error)
1596 pag = xfs_perag_get(mp, agno); 1611 goto error0;
1597 pag->pagf_freeblks += len; 1612
1598 xfs_perag_put(pag); 1613 if (!isfl)
1599 1614 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1600 agf = XFS_BUF_TO_AGF(agbp); 1615 XFS_STATS_INC(xs_freex);
1601 be32_add_cpu(&agf->agf_freeblks, len); 1616 XFS_STATS_ADD(xs_freeb, len);
1602 xfs_trans_agblocks_delta(tp, len);
1603 XFS_WANT_CORRUPTED_GOTO(
1604 be32_to_cpu(agf->agf_freeblks) <=
1605 be32_to_cpu(agf->agf_length),
1606 error0);
1607 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
1608 if (!isfl)
1609 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1610 XFS_STATS_INC(xs_freex);
1611 XFS_STATS_ADD(xs_freeb, len);
1612 }
1613 1617
1614 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); 1618 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1615 1619
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index dc3afd7739ff..fa00788de2f5 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2365,6 +2365,13 @@ xfs_bmap_rtalloc(
2365 */ 2365 */
2366 if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) 2366 if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
2367 ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; 2367 ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
2368
2369 /*
2370 * Lock out other modifications to the RT bitmap inode.
2371 */
2372 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
2373 xfs_trans_ijoin_ref(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
2374
2368 /* 2375 /*
2369 * If it's an allocation to an empty file at offset 0, 2376 * If it's an allocation to an empty file at offset 0,
2370 * pick an extent that will space things out in the rt area. 2377 * pick an extent that will space things out in the rt area.
@@ -3519,7 +3526,7 @@ xfs_bmap_search_extents(
3519 3526
3520 if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && 3527 if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
3521 !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { 3528 !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
3522 xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, 3529 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
3523 "Access to block zero in inode %llu " 3530 "Access to block zero in inode %llu "
3524 "start_block: %llx start_off: %llx " 3531 "start_block: %llx start_off: %llx "
3525 "blkcnt: %llx extent-state: %x lastx: %x\n", 3532 "blkcnt: %llx extent-state: %x lastx: %x\n",
@@ -4193,12 +4200,11 @@ xfs_bmap_read_extents(
4193 num_recs = xfs_btree_get_numrecs(block); 4200 num_recs = xfs_btree_get_numrecs(block);
4194 if (unlikely(i + num_recs > room)) { 4201 if (unlikely(i + num_recs > room)) {
4195 ASSERT(i + num_recs <= room); 4202 ASSERT(i + num_recs <= room);
4196 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 4203 xfs_warn(ip->i_mount,
4197 "corrupt dinode %Lu, (btree extents).", 4204 "corrupt dinode %Lu, (btree extents).",
4198 (unsigned long long) ip->i_ino); 4205 (unsigned long long) ip->i_ino);
4199 XFS_ERROR_REPORT("xfs_bmap_read_extents(1)", 4206 XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
4200 XFS_ERRLEVEL_LOW, 4207 XFS_ERRLEVEL_LOW, ip->i_mount, block);
4201 ip->i_mount);
4202 goto error0; 4208 goto error0;
4203 } 4209 }
4204 XFS_WANT_CORRUPTED_GOTO( 4210 XFS_WANT_CORRUPTED_GOTO(
@@ -5772,7 +5778,7 @@ xfs_check_block(
5772 else 5778 else
5773 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); 5779 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
5774 if (*thispa == *pp) { 5780 if (*thispa == *pp) {
5775 cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", 5781 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
5776 __func__, j, i, 5782 __func__, j, i,
5777 (unsigned long long)be64_to_cpu(*thispa)); 5783 (unsigned long long)be64_to_cpu(*thispa));
5778 panic("%s: ptrs are equal in node\n", 5784 panic("%s: ptrs are equal in node\n",
@@ -5937,11 +5943,11 @@ xfs_bmap_check_leaf_extents(
5937 return; 5943 return;
5938 5944
5939error0: 5945error0:
5940 cmn_err(CE_WARN, "%s: at error0", __func__); 5946 xfs_warn(mp, "%s: at error0", __func__);
5941 if (bp_release) 5947 if (bp_release)
5942 xfs_trans_brelse(NULL, bp); 5948 xfs_trans_brelse(NULL, bp);
5943error_norelse: 5949error_norelse:
5944 cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents", 5950 xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
5945 __func__, i); 5951 __func__, i);
5946 panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); 5952 panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
5947 return; 5953 return;
@@ -6144,7 +6150,7 @@ xfs_bmap_punch_delalloc_range(
6144 if (error) { 6150 if (error) {
6145 /* something screwed, just bail */ 6151 /* something screwed, just bail */
6146 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 6152 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
6147 xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 6153 xfs_alert(ip->i_mount,
6148 "Failed delalloc mapping lookup ino %lld fsb %lld.", 6154 "Failed delalloc mapping lookup ino %lld fsb %lld.",
6149 ip->i_ino, start_fsb); 6155 ip->i_ino, start_fsb);
6150 } 6156 }
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 6f8c21ce0d6d..e5413d96f1af 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -130,10 +130,12 @@ xfs_buf_item_log_check(
130 orig = bip->bli_orig; 130 orig = bip->bli_orig;
131 buffer = XFS_BUF_PTR(bp); 131 buffer = XFS_BUF_PTR(bp);
132 for (x = 0; x < XFS_BUF_COUNT(bp); x++) { 132 for (x = 0; x < XFS_BUF_COUNT(bp); x++) {
133 if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) 133 if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
134 cmn_err(CE_PANIC, 134 xfs_emerg(bp->b_mount,
135 "xfs_buf_item_log_check bip %x buffer %x orig %x index %d", 135 "%s: bip %x buffer %x orig %x index %d",
136 bip, bp, orig, x); 136 __func__, bip, bp, orig, x);
137 ASSERT(0);
138 }
137 } 139 }
138} 140}
139#else 141#else
@@ -983,10 +985,9 @@ xfs_buf_iodone_callbacks(
983 if (XFS_BUF_TARGET(bp) != lasttarg || 985 if (XFS_BUF_TARGET(bp) != lasttarg ||
984 time_after(jiffies, (lasttime + 5*HZ))) { 986 time_after(jiffies, (lasttime + 5*HZ))) {
985 lasttime = jiffies; 987 lasttime = jiffies;
986 cmn_err(CE_ALERT, "Device %s, XFS metadata write error" 988 xfs_alert(mp, "Device %s: metadata write error block 0x%llx",
987 " block 0x%llx in %s",
988 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), 989 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
989 (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname); 990 (__uint64_t)XFS_BUF_ADDR(bp));
990 } 991 }
991 lasttarg = XFS_BUF_TARGET(bp); 992 lasttarg = XFS_BUF_TARGET(bp);
992 993
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 1c00bedb3175..6102ac6d1dff 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1995,13 +1995,12 @@ xfs_da_do_buf(
1995 error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED); 1995 error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
1996 if (unlikely(error == EFSCORRUPTED)) { 1996 if (unlikely(error == EFSCORRUPTED)) {
1997 if (xfs_error_level >= XFS_ERRLEVEL_LOW) { 1997 if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
1998 cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n", 1998 xfs_alert(mp, "%s: bno %lld dir: inode %lld",
1999 (long long)bno); 1999 __func__, (long long)bno,
2000 cmn_err(CE_ALERT, "dir: inode %lld\n",
2001 (long long)dp->i_ino); 2000 (long long)dp->i_ino);
2002 for (i = 0; i < nmap; i++) { 2001 for (i = 0; i < nmap; i++) {
2003 cmn_err(CE_ALERT, 2002 xfs_alert(mp,
2004 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d\n", 2003"[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
2005 i, 2004 i,
2006 (long long)mapp[i].br_startoff, 2005 (long long)mapp[i].br_startoff,
2007 (long long)mapp[i].br_startblock, 2006 (long long)mapp[i].br_startblock,
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index e60490bc00a6..be628677c288 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -270,9 +270,9 @@ xfs_swap_extents(
270 /* check inode formats now that data is flushed */ 270 /* check inode formats now that data is flushed */
271 error = xfs_swap_extents_check_format(ip, tip); 271 error = xfs_swap_extents_check_format(ip, tip);
272 if (error) { 272 if (error) {
273 xfs_fs_cmn_err(CE_NOTE, mp, 273 xfs_notice(mp,
274 "%s: inode 0x%llx format is incompatible for exchanging.", 274 "%s: inode 0x%llx format is incompatible for exchanging.",
275 __FILE__, ip->i_ino); 275 __func__, ip->i_ino);
276 goto out_unlock; 276 goto out_unlock;
277 } 277 }
278 278
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index a1321bc7f192..dba7a71cedf3 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -159,7 +159,7 @@ xfs_dir_ino_validate(
159 XFS_AGINO_TO_INO(mp, agno, agino) == ino; 159 XFS_AGINO_TO_INO(mp, agno, agino) == ino;
160 if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, 160 if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
161 XFS_RANDOM_DIR_INO_VALIDATE))) { 161 XFS_RANDOM_DIR_INO_VALIDATE))) {
162 xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx", 162 xfs_warn(mp, "Invalid inode number 0x%Lx",
163 (unsigned long long) ino); 163 (unsigned long long) ino);
164 XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); 164 XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
165 return XFS_ERROR(EFSCORRUPTED); 165 return XFS_ERROR(EFSCORRUPTED);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index f9a0864b696a..a0aab7d3294f 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -899,10 +899,9 @@ xfs_dir2_leafn_rebalance(
899 if(blk2->index < 0) { 899 if(blk2->index < 0) {
900 state->inleaf = 1; 900 state->inleaf = 1;
901 blk2->index = 0; 901 blk2->index = 0;
902 cmn_err(CE_ALERT, 902 xfs_alert(args->dp->i_mount,
903 "xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting original leaf: " 903 "%s: picked the wrong leaf? reverting original leaf: blk1->index %d\n",
904 "blk1->index %d\n", 904 __func__, blk1->index);
905 blk1->index);
906 } 905 }
907} 906}
908 907
@@ -1641,26 +1640,22 @@ xfs_dir2_node_addname_int(
1641 } 1640 }
1642 1641
1643 if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) { 1642 if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) {
1644 cmn_err(CE_ALERT, 1643 xfs_alert(mp,
1645 "xfs_dir2_node_addname_int: dir ino " 1644 "%s: dir ino " "%llu needed freesp block %lld for\n"
1646 "%llu needed freesp block %lld for\n" 1645 " data block %lld, got %lld ifbno %llu lastfbno %d",
1647 " data block %lld, got %lld\n" 1646 __func__, (unsigned long long)dp->i_ino,
1648 " ifbno %llu lastfbno %d\n",
1649 (unsigned long long)dp->i_ino,
1650 (long long)xfs_dir2_db_to_fdb(mp, dbno), 1647 (long long)xfs_dir2_db_to_fdb(mp, dbno),
1651 (long long)dbno, (long long)fbno, 1648 (long long)dbno, (long long)fbno,
1652 (unsigned long long)ifbno, lastfbno); 1649 (unsigned long long)ifbno, lastfbno);
1653 if (fblk) { 1650 if (fblk) {
1654 cmn_err(CE_ALERT, 1651 xfs_alert(mp,
1655 " fblk 0x%p blkno %llu " 1652 " fblk 0x%p blkno %llu index %d magic 0x%x",
1656 "index %d magic 0x%x\n",
1657 fblk, 1653 fblk,
1658 (unsigned long long)fblk->blkno, 1654 (unsigned long long)fblk->blkno,
1659 fblk->index, 1655 fblk->index,
1660 fblk->magic); 1656 fblk->magic);
1661 } else { 1657 } else {
1662 cmn_err(CE_ALERT, 1658 xfs_alert(mp, " ... fblk is NULL");
1663 " ... fblk is NULL\n");
1664 } 1659 }
1665 XFS_ERROR_REPORT("xfs_dir2_node_addname_int", 1660 XFS_ERROR_REPORT("xfs_dir2_node_addname_int",
1666 XFS_ERRLEVEL_LOW, mp); 1661 XFS_ERRLEVEL_LOW, mp);
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 4c7db74a05f7..39f06336b99d 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -48,7 +48,7 @@ xfs_error_trap(int e)
48 break; 48 break;
49 if (e != xfs_etrap[i]) 49 if (e != xfs_etrap[i])
50 continue; 50 continue;
51 cmn_err(CE_NOTE, "xfs_error_trap: error %d", e); 51 xfs_notice(NULL, "%s: error %d", __func__, e);
52 BUG(); 52 BUG();
53 break; 53 break;
54 } 54 }
@@ -74,7 +74,7 @@ xfs_error_test(int error_tag, int *fsidp, char *expression,
74 74
75 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { 75 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
76 if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) { 76 if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) {
77 cmn_err(CE_WARN, 77 xfs_warn(NULL,
78 "Injecting error (%s) at file %s, line %d, on filesystem \"%s\"", 78 "Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
79 expression, file, line, xfs_etest_fsname[i]); 79 expression, file, line, xfs_etest_fsname[i]);
80 return 1; 80 return 1;
@@ -95,14 +95,14 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
95 95
96 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { 96 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
97 if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) { 97 if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) {
98 cmn_err(CE_WARN, "XFS error tag #%d on", error_tag); 98 xfs_warn(mp, "error tag #%d on", error_tag);
99 return 0; 99 return 0;
100 } 100 }
101 } 101 }
102 102
103 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { 103 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
104 if (xfs_etest[i] == 0) { 104 if (xfs_etest[i] == 0) {
105 cmn_err(CE_WARN, "Turned on XFS error tag #%d", 105 xfs_warn(mp, "Turned on XFS error tag #%d",
106 error_tag); 106 error_tag);
107 xfs_etest[i] = error_tag; 107 xfs_etest[i] = error_tag;
108 xfs_etest_fsid[i] = fsid; 108 xfs_etest_fsid[i] = fsid;
@@ -114,7 +114,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
114 } 114 }
115 } 115 }
116 116
117 cmn_err(CE_WARN, "error tag overflow, too many turned on"); 117 xfs_warn(mp, "error tag overflow, too many turned on");
118 118
119 return 1; 119 return 1;
120} 120}
@@ -133,7 +133,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
133 if ((fsid == 0LL || xfs_etest_fsid[i] == fsid) && 133 if ((fsid == 0LL || xfs_etest_fsid[i] == fsid) &&
134 xfs_etest[i] != 0) { 134 xfs_etest[i] != 0) {
135 cleared = 1; 135 cleared = 1;
136 cmn_err(CE_WARN, "Clearing XFS error tag #%d", 136 xfs_warn(mp, "Clearing XFS error tag #%d",
137 xfs_etest[i]); 137 xfs_etest[i]);
138 xfs_etest[i] = 0; 138 xfs_etest[i] = 0;
139 xfs_etest_fsid[i] = 0LL; 139 xfs_etest_fsid[i] = 0LL;
@@ -144,9 +144,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
144 } 144 }
145 145
146 if (loud || cleared) 146 if (loud || cleared)
147 cmn_err(CE_WARN, 147 xfs_warn(mp, "Cleared all XFS error tags for filesystem");
148 "Cleared all XFS error tags for filesystem \"%s\"",
149 mp->m_fsname);
150 148
151 return 0; 149 return 0;
152} 150}
@@ -162,9 +160,8 @@ xfs_error_report(
162 inst_t *ra) 160 inst_t *ra)
163{ 161{
164 if (level <= xfs_error_level) { 162 if (level <= xfs_error_level) {
165 xfs_cmn_err(XFS_PTAG_ERROR_REPORT, 163 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
166 CE_ALERT, mp, 164 "Internal error %s at line %d of file %s. Caller 0x%p\n",
167 "XFS internal error %s at line %d of file %s. Caller 0x%p\n",
168 tag, linenum, filename, ra); 165 tag, linenum, filename, ra);
169 166
170 xfs_stack_trace(); 167 xfs_stack_trace();
@@ -184,4 +181,5 @@ xfs_corruption_error(
184 if (level <= xfs_error_level) 181 if (level <= xfs_error_level)
185 xfs_hex_dump(p, 16); 182 xfs_hex_dump(p, 16);
186 xfs_error_report(tag, level, mp, filename, linenum, ra); 183 xfs_error_report(tag, level, mp, filename, linenum, ra);
184 xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
187} 185}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 10dce5475f02..079a367f44ee 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -145,10 +145,8 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud);
145#endif /* DEBUG */ 145#endif /* DEBUG */
146 146
147/* 147/*
148 * XFS panic tags -- allow a call to xfs_cmn_err() be turned into 148 * XFS panic tags -- allow a call to xfs_alert_tag() be turned into
149 * a panic by setting xfs_panic_mask in a 149 * a panic by setting xfs_panic_mask in a sysctl.
150 * sysctl. update xfs_max[XFS_PARAM] if
151 * more are added.
152 */ 150 */
153#define XFS_NO_PTAG 0 151#define XFS_NO_PTAG 0
154#define XFS_PTAG_IFLUSH 0x00000001 152#define XFS_PTAG_IFLUSH 0x00000001
@@ -160,17 +158,4 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud);
160#define XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040 158#define XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040
161#define XFS_PTAG_FSBLOCK_ZERO 0x00000080 159#define XFS_PTAG_FSBLOCK_ZERO 0x00000080
162 160
163struct xfs_mount;
164
165extern void xfs_hex_dump(void *p, int length);
166
167#define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \
168 xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args)
169
170#define xfs_fs_mount_cmn_err(f, fmt, args...) \
171 do { \
172 if (!(f & XFS_MFSI_QUIET)) \
173 cmn_err(CE_WARN, "XFS: " fmt, ## args); \
174 } while (0)
175
176#endif /* __XFS_ERROR_H__ */ 161#endif /* __XFS_ERROR_H__ */
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 85668efb3e3e..9153d2c77caf 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -385,8 +385,8 @@ xfs_growfs_data_private(
385 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 385 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
386 XFS_FSS_TO_BB(mp, 1), 0, &bp); 386 XFS_FSS_TO_BB(mp, 1), 0, &bp);
387 if (error) { 387 if (error) {
388 xfs_fs_cmn_err(CE_WARN, mp, 388 xfs_warn(mp,
389 "error %d reading secondary superblock for ag %d", 389 "error %d reading secondary superblock for ag %d",
390 error, agno); 390 error, agno);
391 break; 391 break;
392 } 392 }
@@ -399,7 +399,7 @@ xfs_growfs_data_private(
399 if (!(error = xfs_bwrite(mp, bp))) { 399 if (!(error = xfs_bwrite(mp, bp))) {
400 continue; 400 continue;
401 } else { 401 } else {
402 xfs_fs_cmn_err(CE_WARN, mp, 402 xfs_warn(mp,
403 "write error %d updating secondary superblock for ag %d", 403 "write error %d updating secondary superblock for ag %d",
404 error, agno); 404 error, agno);
405 break; /* no point in continuing */ 405 break; /* no point in continuing */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 0626a32c3447..84ebeec16642 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1055,28 +1055,23 @@ xfs_difree(
1055 */ 1055 */
1056 agno = XFS_INO_TO_AGNO(mp, inode); 1056 agno = XFS_INO_TO_AGNO(mp, inode);
1057 if (agno >= mp->m_sb.sb_agcount) { 1057 if (agno >= mp->m_sb.sb_agcount) {
1058 cmn_err(CE_WARN, 1058 xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
1059 "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.", 1059 __func__, agno, mp->m_sb.sb_agcount);
1060 agno, mp->m_sb.sb_agcount, mp->m_fsname);
1061 ASSERT(0); 1060 ASSERT(0);
1062 return XFS_ERROR(EINVAL); 1061 return XFS_ERROR(EINVAL);
1063 } 1062 }
1064 agino = XFS_INO_TO_AGINO(mp, inode); 1063 agino = XFS_INO_TO_AGINO(mp, inode);
1065 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { 1064 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
1066 cmn_err(CE_WARN, 1065 xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
1067 "xfs_difree: inode != XFS_AGINO_TO_INO() " 1066 __func__, (unsigned long long)inode,
1068 "(%llu != %llu) on %s. Returning EINVAL.", 1067 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
1069 (unsigned long long)inode,
1070 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino),
1071 mp->m_fsname);
1072 ASSERT(0); 1068 ASSERT(0);
1073 return XFS_ERROR(EINVAL); 1069 return XFS_ERROR(EINVAL);
1074 } 1070 }
1075 agbno = XFS_AGINO_TO_AGBNO(mp, agino); 1071 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1076 if (agbno >= mp->m_sb.sb_agblocks) { 1072 if (agbno >= mp->m_sb.sb_agblocks) {
1077 cmn_err(CE_WARN, 1073 xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
1078 "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.", 1074 __func__, agbno, mp->m_sb.sb_agblocks);
1079 agbno, mp->m_sb.sb_agblocks, mp->m_fsname);
1080 ASSERT(0); 1075 ASSERT(0);
1081 return XFS_ERROR(EINVAL); 1076 return XFS_ERROR(EINVAL);
1082 } 1077 }
@@ -1085,9 +1080,8 @@ xfs_difree(
1085 */ 1080 */
1086 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1081 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1087 if (error) { 1082 if (error) {
1088 cmn_err(CE_WARN, 1083 xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
1089 "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", 1084 __func__, error);
1090 error, mp->m_fsname);
1091 return error; 1085 return error;
1092 } 1086 }
1093 agi = XFS_BUF_TO_AGI(agbp); 1087 agi = XFS_BUF_TO_AGI(agbp);
@@ -1106,17 +1100,15 @@ xfs_difree(
1106 * Look for the entry describing this inode. 1100 * Look for the entry describing this inode.
1107 */ 1101 */
1108 if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { 1102 if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) {
1109 cmn_err(CE_WARN, 1103 xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.",
1110 "xfs_difree: xfs_inobt_lookup returned() an error %d on %s. Returning error.", 1104 __func__, error);
1111 error, mp->m_fsname);
1112 goto error0; 1105 goto error0;
1113 } 1106 }
1114 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1107 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1115 error = xfs_inobt_get_rec(cur, &rec, &i); 1108 error = xfs_inobt_get_rec(cur, &rec, &i);
1116 if (error) { 1109 if (error) {
1117 cmn_err(CE_WARN, 1110 xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
1118 "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", 1111 __func__, error);
1119 error, mp->m_fsname);
1120 goto error0; 1112 goto error0;
1121 } 1113 }
1122 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1114 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
@@ -1157,8 +1149,8 @@ xfs_difree(
1157 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); 1149 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
1158 1150
1159 if ((error = xfs_btree_delete(cur, &i))) { 1151 if ((error = xfs_btree_delete(cur, &i))) {
1160 cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n", 1152 xfs_warn(mp, "%s: xfs_btree_delete returned error %d.",
1161 error, mp->m_fsname); 1153 __func__, error);
1162 goto error0; 1154 goto error0;
1163 } 1155 }
1164 1156
@@ -1170,9 +1162,8 @@ xfs_difree(
1170 1162
1171 error = xfs_inobt_update(cur, &rec); 1163 error = xfs_inobt_update(cur, &rec);
1172 if (error) { 1164 if (error) {
1173 cmn_err(CE_WARN, 1165 xfs_warn(mp, "%s: xfs_inobt_update returned error %d.",
1174 "xfs_difree: xfs_inobt_update returned an error %d on %s.", 1166 __func__, error);
1175 error, mp->m_fsname);
1176 goto error0; 1167 goto error0;
1177 } 1168 }
1178 1169
@@ -1218,10 +1209,9 @@ xfs_imap_lookup(
1218 1209
1219 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1210 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1220 if (error) { 1211 if (error) {
1221 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1212 xfs_alert(mp,
1222 "xfs_ialloc_read_agi() returned " 1213 "%s: xfs_ialloc_read_agi() returned error %d, agno %d",
1223 "error %d, agno %d", 1214 __func__, error, agno);
1224 error, agno);
1225 return error; 1215 return error;
1226 } 1216 }
1227 1217
@@ -1299,24 +1289,21 @@ xfs_imap(
1299 if (flags & XFS_IGET_UNTRUSTED) 1289 if (flags & XFS_IGET_UNTRUSTED)
1300 return XFS_ERROR(EINVAL); 1290 return XFS_ERROR(EINVAL);
1301 if (agno >= mp->m_sb.sb_agcount) { 1291 if (agno >= mp->m_sb.sb_agcount) {
1302 xfs_fs_cmn_err(CE_ALERT, mp, 1292 xfs_alert(mp,
1303 "xfs_imap: agno (%d) >= " 1293 "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)",
1304 "mp->m_sb.sb_agcount (%d)", 1294 __func__, agno, mp->m_sb.sb_agcount);
1305 agno, mp->m_sb.sb_agcount);
1306 } 1295 }
1307 if (agbno >= mp->m_sb.sb_agblocks) { 1296 if (agbno >= mp->m_sb.sb_agblocks) {
1308 xfs_fs_cmn_err(CE_ALERT, mp, 1297 xfs_alert(mp,
1309 "xfs_imap: agbno (0x%llx) >= " 1298 "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
1310 "mp->m_sb.sb_agblocks (0x%lx)", 1299 __func__, (unsigned long long)agbno,
1311 (unsigned long long) agbno, 1300 (unsigned long)mp->m_sb.sb_agblocks);
1312 (unsigned long) mp->m_sb.sb_agblocks);
1313 } 1301 }
1314 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1302 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1315 xfs_fs_cmn_err(CE_ALERT, mp, 1303 xfs_alert(mp,
1316 "xfs_imap: ino (0x%llx) != " 1304 "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)",
1317 "XFS_AGINO_TO_INO(mp, agno, agino) " 1305 __func__, ino,
1318 "(0x%llx)", 1306 XFS_AGINO_TO_INO(mp, agno, agino));
1319 ino, XFS_AGINO_TO_INO(mp, agno, agino));
1320 } 1307 }
1321 xfs_stack_trace(); 1308 xfs_stack_trace();
1322#endif /* DEBUG */ 1309#endif /* DEBUG */
@@ -1388,10 +1375,9 @@ out_map:
1388 */ 1375 */
1389 if ((imap->im_blkno + imap->im_len) > 1376 if ((imap->im_blkno + imap->im_len) >
1390 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 1377 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
1391 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1378 xfs_alert(mp,
1392 "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " 1379 "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)",
1393 " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", 1380 __func__, (unsigned long long) imap->im_blkno,
1394 (unsigned long long) imap->im_blkno,
1395 (unsigned long long) imap->im_len, 1381 (unsigned long long) imap->im_len,
1396 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 1382 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
1397 return XFS_ERROR(EINVAL); 1383 return XFS_ERROR(EINVAL);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index be7cf625421f..742c8330994a 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -110,8 +110,8 @@ xfs_inobp_check(
110 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 110 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
111 i * mp->m_sb.sb_inodesize); 111 i * mp->m_sb.sb_inodesize);
112 if (!dip->di_next_unlinked) { 112 if (!dip->di_next_unlinked) {
113 xfs_fs_cmn_err(CE_ALERT, mp, 113 xfs_alert(mp,
114 "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.", 114 "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
115 bp); 115 bp);
116 ASSERT(dip->di_next_unlinked); 116 ASSERT(dip->di_next_unlinked);
117 } 117 }
@@ -142,10 +142,9 @@ xfs_imap_to_bp(
142 (int)imap->im_len, buf_flags, &bp); 142 (int)imap->im_len, buf_flags, &bp);
143 if (error) { 143 if (error) {
144 if (error != EAGAIN) { 144 if (error != EAGAIN) {
145 cmn_err(CE_WARN, 145 xfs_warn(mp,
146 "xfs_imap_to_bp: xfs_trans_read_buf()returned " 146 "%s: xfs_trans_read_buf() returned error %d.",
147 "an error %d on %s. Returning error.", 147 __func__, error);
148 error, mp->m_fsname);
149 } else { 148 } else {
150 ASSERT(buf_flags & XBF_TRYLOCK); 149 ASSERT(buf_flags & XBF_TRYLOCK);
151 } 150 }
@@ -180,12 +179,11 @@ xfs_imap_to_bp(
180 XFS_CORRUPTION_ERROR("xfs_imap_to_bp", 179 XFS_CORRUPTION_ERROR("xfs_imap_to_bp",
181 XFS_ERRLEVEL_HIGH, mp, dip); 180 XFS_ERRLEVEL_HIGH, mp, dip);
182#ifdef DEBUG 181#ifdef DEBUG
183 cmn_err(CE_PANIC, 182 xfs_emerg(mp,
184 "Device %s - bad inode magic/vsn " 183 "bad inode magic/vsn daddr %lld #%d (magic=%x)",
185 "daddr %lld #%d (magic=%x)",
186 XFS_BUFTARG_NAME(mp->m_ddev_targp),
187 (unsigned long long)imap->im_blkno, i, 184 (unsigned long long)imap->im_blkno, i,
188 be16_to_cpu(dip->di_magic)); 185 be16_to_cpu(dip->di_magic));
186 ASSERT(0);
189#endif 187#endif
190 xfs_trans_brelse(tp, bp); 188 xfs_trans_brelse(tp, bp);
191 return XFS_ERROR(EFSCORRUPTED); 189 return XFS_ERROR(EFSCORRUPTED);
@@ -317,7 +315,7 @@ xfs_iformat(
317 if (unlikely(be32_to_cpu(dip->di_nextents) + 315 if (unlikely(be32_to_cpu(dip->di_nextents) +
318 be16_to_cpu(dip->di_anextents) > 316 be16_to_cpu(dip->di_anextents) >
319 be64_to_cpu(dip->di_nblocks))) { 317 be64_to_cpu(dip->di_nblocks))) {
320 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 318 xfs_warn(ip->i_mount,
321 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", 319 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
322 (unsigned long long)ip->i_ino, 320 (unsigned long long)ip->i_ino,
323 (int)(be32_to_cpu(dip->di_nextents) + 321 (int)(be32_to_cpu(dip->di_nextents) +
@@ -330,8 +328,7 @@ xfs_iformat(
330 } 328 }
331 329
332 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { 330 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
333 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 331 xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
334 "corrupt dinode %Lu, forkoff = 0x%x.",
335 (unsigned long long)ip->i_ino, 332 (unsigned long long)ip->i_ino,
336 dip->di_forkoff); 333 dip->di_forkoff);
337 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 334 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
@@ -341,7 +338,7 @@ xfs_iformat(
341 338
342 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && 339 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
343 !ip->i_mount->m_rtdev_targp)) { 340 !ip->i_mount->m_rtdev_targp)) {
344 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 341 xfs_warn(ip->i_mount,
345 "corrupt dinode %Lu, has realtime flag set.", 342 "corrupt dinode %Lu, has realtime flag set.",
346 ip->i_ino); 343 ip->i_ino);
347 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", 344 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
@@ -373,9 +370,8 @@ xfs_iformat(
373 * no local regular files yet 370 * no local regular files yet
374 */ 371 */
375 if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { 372 if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) {
376 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 373 xfs_warn(ip->i_mount,
377 "corrupt inode %Lu " 374 "corrupt inode %Lu (local format for regular file).",
378 "(local format for regular file).",
379 (unsigned long long) ip->i_ino); 375 (unsigned long long) ip->i_ino);
380 XFS_CORRUPTION_ERROR("xfs_iformat(4)", 376 XFS_CORRUPTION_ERROR("xfs_iformat(4)",
381 XFS_ERRLEVEL_LOW, 377 XFS_ERRLEVEL_LOW,
@@ -385,9 +381,8 @@ xfs_iformat(
385 381
386 di_size = be64_to_cpu(dip->di_size); 382 di_size = be64_to_cpu(dip->di_size);
387 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 383 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
388 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 384 xfs_warn(ip->i_mount,
389 "corrupt inode %Lu " 385 "corrupt inode %Lu (bad size %Ld for local inode).",
390 "(bad size %Ld for local inode).",
391 (unsigned long long) ip->i_ino, 386 (unsigned long long) ip->i_ino,
392 (long long) di_size); 387 (long long) di_size);
393 XFS_CORRUPTION_ERROR("xfs_iformat(5)", 388 XFS_CORRUPTION_ERROR("xfs_iformat(5)",
@@ -431,9 +426,8 @@ xfs_iformat(
431 size = be16_to_cpu(atp->hdr.totsize); 426 size = be16_to_cpu(atp->hdr.totsize);
432 427
433 if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { 428 if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
434 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 429 xfs_warn(ip->i_mount,
435 "corrupt inode %Lu " 430 "corrupt inode %Lu (bad attr fork size %Ld).",
436 "(bad attr fork size %Ld).",
437 (unsigned long long) ip->i_ino, 431 (unsigned long long) ip->i_ino,
438 (long long) size); 432 (long long) size);
439 XFS_CORRUPTION_ERROR("xfs_iformat(8)", 433 XFS_CORRUPTION_ERROR("xfs_iformat(8)",
@@ -488,9 +482,8 @@ xfs_iformat_local(
488 * kmem_alloc() or memcpy() below. 482 * kmem_alloc() or memcpy() below.
489 */ 483 */
490 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 484 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
491 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 485 xfs_warn(ip->i_mount,
492 "corrupt inode %Lu " 486 "corrupt inode %Lu (bad size %d for local fork, size = %d).",
493 "(bad size %d for local fork, size = %d).",
494 (unsigned long long) ip->i_ino, size, 487 (unsigned long long) ip->i_ino, size,
495 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 488 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
496 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, 489 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
@@ -547,8 +540,7 @@ xfs_iformat_extents(
547 * kmem_alloc() or memcpy() below. 540 * kmem_alloc() or memcpy() below.
548 */ 541 */
549 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 542 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
550 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 543 xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
551 "corrupt inode %Lu ((a)extents = %d).",
552 (unsigned long long) ip->i_ino, nex); 544 (unsigned long long) ip->i_ino, nex);
553 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 545 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
554 ip->i_mount, dip); 546 ip->i_mount, dip);
@@ -623,11 +615,10 @@ xfs_iformat_btree(
623 || XFS_BMDR_SPACE_CALC(nrecs) > 615 || XFS_BMDR_SPACE_CALC(nrecs) >
624 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) 616 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
625 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 617 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
626 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 618 xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).",
627 "corrupt inode %Lu (btree).",
628 (unsigned long long) ip->i_ino); 619 (unsigned long long) ip->i_ino);
629 XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 620 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
630 ip->i_mount); 621 ip->i_mount, dip);
631 return XFS_ERROR(EFSCORRUPTED); 622 return XFS_ERROR(EFSCORRUPTED);
632 } 623 }
633 624
@@ -813,11 +804,9 @@ xfs_iread(
813 */ 804 */
814 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { 805 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) {
815#ifdef DEBUG 806#ifdef DEBUG
816 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 807 xfs_alert(mp,
817 "dip->di_magic (0x%x) != " 808 "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
818 "XFS_DINODE_MAGIC (0x%x)", 809 __func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC);
819 be16_to_cpu(dip->di_magic),
820 XFS_DINODE_MAGIC);
821#endif /* DEBUG */ 810#endif /* DEBUG */
822 error = XFS_ERROR(EINVAL); 811 error = XFS_ERROR(EINVAL);
823 goto out_brelse; 812 goto out_brelse;
@@ -835,9 +824,8 @@ xfs_iread(
835 error = xfs_iformat(ip, dip); 824 error = xfs_iformat(ip, dip);
836 if (error) { 825 if (error) {
837#ifdef DEBUG 826#ifdef DEBUG
838 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 827 xfs_alert(mp, "%s: xfs_iformat() returned error %d",
839 "xfs_iformat() returned error %d", 828 __func__, error);
840 error);
841#endif /* DEBUG */ 829#endif /* DEBUG */
842 goto out_brelse; 830 goto out_brelse;
843 } 831 }
@@ -1016,8 +1004,8 @@ xfs_ialloc(
1016 * This is because we're setting fields here we need 1004 * This is because we're setting fields here we need
1017 * to prevent others from looking at until we're done. 1005 * to prevent others from looking at until we're done.
1018 */ 1006 */
1019 error = xfs_trans_iget(tp->t_mountp, tp, ino, 1007 error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE,
1020 XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); 1008 XFS_ILOCK_EXCL, &ip);
1021 if (error) 1009 if (error)
1022 return error; 1010 return error;
1023 ASSERT(ip != NULL); 1011 ASSERT(ip != NULL);
@@ -1166,6 +1154,7 @@ xfs_ialloc(
1166 /* 1154 /*
1167 * Log the new values stuffed into the inode. 1155 * Log the new values stuffed into the inode.
1168 */ 1156 */
1157 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
1169 xfs_trans_log_inode(tp, ip, flags); 1158 xfs_trans_log_inode(tp, ip, flags);
1170 1159
1171 /* now that we have an i_mode we can setup inode ops and unlock */ 1160 /* now that we have an i_mode we can setup inode ops and unlock */
@@ -1820,9 +1809,8 @@ xfs_iunlink_remove(
1820 */ 1809 */
1821 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); 1810 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1822 if (error) { 1811 if (error) {
1823 cmn_err(CE_WARN, 1812 xfs_warn(mp, "%s: xfs_itobp() returned error %d.",
1824 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1813 __func__, error);
1825 error, mp->m_fsname);
1826 return error; 1814 return error;
1827 } 1815 }
1828 next_agino = be32_to_cpu(dip->di_next_unlinked); 1816 next_agino = be32_to_cpu(dip->di_next_unlinked);
@@ -1867,9 +1855,9 @@ xfs_iunlink_remove(
1867 error = xfs_inotobp(mp, tp, next_ino, &last_dip, 1855 error = xfs_inotobp(mp, tp, next_ino, &last_dip,
1868 &last_ibp, &last_offset, 0); 1856 &last_ibp, &last_offset, 0);
1869 if (error) { 1857 if (error) {
1870 cmn_err(CE_WARN, 1858 xfs_warn(mp,
1871 "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", 1859 "%s: xfs_inotobp() returned error %d.",
1872 error, mp->m_fsname); 1860 __func__, error);
1873 return error; 1861 return error;
1874 } 1862 }
1875 next_agino = be32_to_cpu(last_dip->di_next_unlinked); 1863 next_agino = be32_to_cpu(last_dip->di_next_unlinked);
@@ -1882,9 +1870,8 @@ xfs_iunlink_remove(
1882 */ 1870 */
1883 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); 1871 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1884 if (error) { 1872 if (error) {
1885 cmn_err(CE_WARN, 1873 xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.",
1886 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1874 __func__, error);
1887 error, mp->m_fsname);
1888 return error; 1875 return error;
1889 } 1876 }
1890 next_agino = be32_to_cpu(dip->di_next_unlinked); 1877 next_agino = be32_to_cpu(dip->di_next_unlinked);
@@ -2848,7 +2835,7 @@ xfs_iflush(
2848 * Get the buffer containing the on-disk inode. 2835 * Get the buffer containing the on-disk inode.
2849 */ 2836 */
2850 error = xfs_itobp(mp, NULL, ip, &dip, &bp, 2837 error = xfs_itobp(mp, NULL, ip, &dip, &bp,
2851 (flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK); 2838 (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK);
2852 if (error || !bp) { 2839 if (error || !bp) {
2853 xfs_ifunlock(ip); 2840 xfs_ifunlock(ip);
2854 return error; 2841 return error;
@@ -2939,16 +2926,16 @@ xfs_iflush_int(
2939 2926
2940 if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, 2927 if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC,
2941 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 2928 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
2942 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2929 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2943 "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", 2930 "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
2944 ip->i_ino, be16_to_cpu(dip->di_magic), dip); 2931 __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
2945 goto corrupt_out; 2932 goto corrupt_out;
2946 } 2933 }
2947 if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, 2934 if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
2948 mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { 2935 mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) {
2949 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2936 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2950 "xfs_iflush: Bad inode %Lu, ptr 0x%p, magic number 0x%x", 2937 "%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x",
2951 ip->i_ino, ip, ip->i_d.di_magic); 2938 __func__, ip->i_ino, ip, ip->i_d.di_magic);
2952 goto corrupt_out; 2939 goto corrupt_out;
2953 } 2940 }
2954 if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 2941 if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
@@ -2956,9 +2943,9 @@ xfs_iflush_int(
2956 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 2943 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
2957 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 2944 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
2958 mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { 2945 mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
2959 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2946 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2960 "xfs_iflush: Bad regular inode %Lu, ptr 0x%p", 2947 "%s: Bad regular inode %Lu, ptr 0x%p",
2961 ip->i_ino, ip); 2948 __func__, ip->i_ino, ip);
2962 goto corrupt_out; 2949 goto corrupt_out;
2963 } 2950 }
2964 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 2951 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
@@ -2967,28 +2954,28 @@ xfs_iflush_int(
2967 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 2954 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
2968 (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 2955 (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
2969 mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { 2956 mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) {
2970 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2957 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2971 "xfs_iflush: Bad directory inode %Lu, ptr 0x%p", 2958 "%s: Bad directory inode %Lu, ptr 0x%p",
2972 ip->i_ino, ip); 2959 __func__, ip->i_ino, ip);
2973 goto corrupt_out; 2960 goto corrupt_out;
2974 } 2961 }
2975 } 2962 }
2976 if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > 2963 if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
2977 ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, 2964 ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5,
2978 XFS_RANDOM_IFLUSH_5)) { 2965 XFS_RANDOM_IFLUSH_5)) {
2979 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2966 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2980 "xfs_iflush: detected corrupt incore inode %Lu, total extents = %d, nblocks = %Ld, ptr 0x%p", 2967 "%s: detected corrupt incore inode %Lu, "
2981 ip->i_ino, 2968 "total extents = %d, nblocks = %Ld, ptr 0x%p",
2969 __func__, ip->i_ino,
2982 ip->i_d.di_nextents + ip->i_d.di_anextents, 2970 ip->i_d.di_nextents + ip->i_d.di_anextents,
2983 ip->i_d.di_nblocks, 2971 ip->i_d.di_nblocks, ip);
2984 ip);
2985 goto corrupt_out; 2972 goto corrupt_out;
2986 } 2973 }
2987 if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 2974 if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
2988 mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { 2975 mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) {
2989 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2976 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2990 "xfs_iflush: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 2977 "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
2991 ip->i_ino, ip->i_d.di_forkoff, ip); 2978 __func__, ip->i_ino, ip->i_d.di_forkoff, ip);
2992 goto corrupt_out; 2979 goto corrupt_out;
2993 } 2980 }
2994 /* 2981 /*
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 5c95fa8ec11d..f753200cef8d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -409,28 +409,35 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
409/* 409/*
410 * Flags for lockdep annotations. 410 * Flags for lockdep annotations.
411 * 411 *
412 * XFS_I[O]LOCK_PARENT - for operations that require locking two inodes 412 * XFS_LOCK_PARENT - for directory operations that require locking a
413 * (ie directory operations that require locking a directory inode and 413 * parent directory inode and a child entry inode. The parent gets locked
414 * an entry inode). The first inode gets locked with this flag so it 414 * with this flag so it gets a lockdep subclass of 1 and the child entry
415 * gets a lockdep subclass of 1 and the second lock will have a lockdep 415 * lock will have a lockdep subclass of 0.
416 * subclass of 0. 416 *
417 * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary
418 * inodes do not participate in the normal lock order, and thus have their
419 * own subclasses.
417 * 420 *
418 * XFS_LOCK_INUMORDER - for locking several inodes at the some time 421 * XFS_LOCK_INUMORDER - for locking several inodes at the some time
419 * with xfs_lock_inodes(). This flag is used as the starting subclass 422 * with xfs_lock_inodes(). This flag is used as the starting subclass
420 * and each subsequent lock acquired will increment the subclass by one. 423 * and each subsequent lock acquired will increment the subclass by one.
421 * So the first lock acquired will have a lockdep subclass of 2, the 424 * So the first lock acquired will have a lockdep subclass of 4, the
422 * second lock will have a lockdep subclass of 3, and so on. It is 425 * second lock will have a lockdep subclass of 5, and so on. It is
423 * the responsibility of the class builder to shift this to the correct 426 * the responsibility of the class builder to shift this to the correct
424 * portion of the lock_mode lockdep mask. 427 * portion of the lock_mode lockdep mask.
425 */ 428 */
426#define XFS_LOCK_PARENT 1 429#define XFS_LOCK_PARENT 1
427#define XFS_LOCK_INUMORDER 2 430#define XFS_LOCK_RTBITMAP 2
431#define XFS_LOCK_RTSUM 3
432#define XFS_LOCK_INUMORDER 4
428 433
429#define XFS_IOLOCK_SHIFT 16 434#define XFS_IOLOCK_SHIFT 16
430#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) 435#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
431 436
432#define XFS_ILOCK_SHIFT 24 437#define XFS_ILOCK_SHIFT 24
433#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) 438#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
439#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
440#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
434 441
435#define XFS_IOLOCK_DEP_MASK 0x00ff0000 442#define XFS_IOLOCK_DEP_MASK 0x00ff0000
436#define XFS_ILOCK_DEP_MASK 0xff000000 443#define XFS_ILOCK_DEP_MASK 0xff000000
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index fd4f398bd6f1..46cc40131d4a 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -760,11 +760,11 @@ xfs_inode_item_push(
760 * Push the inode to it's backing buffer. This will not remove the 760 * Push the inode to it's backing buffer. This will not remove the
761 * inode from the AIL - a further push will be required to trigger a 761 * inode from the AIL - a further push will be required to trigger a
762 * buffer push. However, this allows all the dirty inodes to be pushed 762 * buffer push. However, this allows all the dirty inodes to be pushed
763 * to the buffer before it is pushed to disk. THe buffer IO completion 763 * to the buffer before it is pushed to disk. The buffer IO completion
764 * will pull th einode from the AIL, mark it clean and unlock the flush 764 * will pull the inode from the AIL, mark it clean and unlock the flush
765 * lock. 765 * lock.
766 */ 766 */
767 (void) xfs_iflush(ip, 0); 767 (void) xfs_iflush(ip, SYNC_TRYLOCK);
768 xfs_iunlock(ip, XFS_ILOCK_SHARED); 768 xfs_iunlock(ip, XFS_ILOCK_SHARED);
769} 769}
770 770
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 8a0f044750c3..091d82b94c4d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -101,11 +101,11 @@ xfs_iomap_eof_align_last_fsb(
101} 101}
102 102
103STATIC int 103STATIC int
104xfs_cmn_err_fsblock_zero( 104xfs_alert_fsblock_zero(
105 xfs_inode_t *ip, 105 xfs_inode_t *ip,
106 xfs_bmbt_irec_t *imap) 106 xfs_bmbt_irec_t *imap)
107{ 107{
108 xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, 108 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
109 "Access to block zero in inode %llu " 109 "Access to block zero in inode %llu "
110 "start_block: %llx start_off: %llx " 110 "start_block: %llx start_off: %llx "
111 "blkcnt: %llx extent-state: %x\n", 111 "blkcnt: %llx extent-state: %x\n",
@@ -246,7 +246,7 @@ xfs_iomap_write_direct(
246 } 246 }
247 247
248 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) { 248 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) {
249 error = xfs_cmn_err_fsblock_zero(ip, imap); 249 error = xfs_alert_fsblock_zero(ip, imap);
250 goto error_out; 250 goto error_out;
251 } 251 }
252 252
@@ -464,7 +464,7 @@ retry:
464 } 464 }
465 465
466 if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) 466 if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
467 return xfs_cmn_err_fsblock_zero(ip, &imap[0]); 467 return xfs_alert_fsblock_zero(ip, &imap[0]);
468 468
469 *ret_imap = imap[0]; 469 *ret_imap = imap[0];
470 return 0; 470 return 0;
@@ -614,7 +614,7 @@ xfs_iomap_write_allocate(
614 * covers at least part of the callers request 614 * covers at least part of the callers request
615 */ 615 */
616 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) 616 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
617 return xfs_cmn_err_fsblock_zero(ip, imap); 617 return xfs_alert_fsblock_zero(ip, imap);
618 618
619 if ((offset_fsb >= imap->br_startoff) && 619 if ((offset_fsb >= imap->br_startoff) &&
620 (offset_fsb < (imap->br_startoff + 620 (offset_fsb < (imap->br_startoff +
@@ -724,7 +724,7 @@ xfs_iomap_write_unwritten(
724 return XFS_ERROR(error); 724 return XFS_ERROR(error);
725 725
726 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) 726 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
727 return xfs_cmn_err_fsblock_zero(ip, &imap); 727 return xfs_alert_fsblock_zero(ip, &imap);
728 728
729 if ((numblks_fsb = imap.br_blockcount) == 0) { 729 if ((numblks_fsb = imap.br_blockcount) == 0) {
730 /* 730 /*
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ae6fef1ff563..25efa9b8a602 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -374,11 +374,10 @@ xfs_log_mount(
374 int error; 374 int error;
375 375
376 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) 376 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
377 cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname); 377 xfs_notice(mp, "Mounting Filesystem");
378 else { 378 else {
379 cmn_err(CE_NOTE, 379 xfs_notice(mp,
380 "Mounting filesystem \"%s\" in no-recovery mode. Filesystem will be inconsistent.", 380"Mounting filesystem in no-recovery mode. Filesystem will be inconsistent.");
381 mp->m_fsname);
382 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); 381 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
383 } 382 }
384 383
@@ -393,7 +392,7 @@ xfs_log_mount(
393 */ 392 */
394 error = xfs_trans_ail_init(mp); 393 error = xfs_trans_ail_init(mp);
395 if (error) { 394 if (error) {
396 cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); 395 xfs_warn(mp, "AIL initialisation failed: error %d", error);
397 goto out_free_log; 396 goto out_free_log;
398 } 397 }
399 mp->m_log->l_ailp = mp->m_ail; 398 mp->m_log->l_ailp = mp->m_ail;
@@ -413,7 +412,8 @@ xfs_log_mount(
413 if (readonly) 412 if (readonly)
414 mp->m_flags |= XFS_MOUNT_RDONLY; 413 mp->m_flags |= XFS_MOUNT_RDONLY;
415 if (error) { 414 if (error) {
416 cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error); 415 xfs_warn(mp, "log mount/recovery failed: error %d",
416 error);
417 goto out_destroy_ail; 417 goto out_destroy_ail;
418 } 418 }
419 } 419 }
@@ -542,10 +542,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
542 */ 542 */
543 } 543 }
544 544
545 if (error) { 545 if (error)
546 xfs_fs_cmn_err(CE_ALERT, mp, 546 xfs_alert(mp, "%s: unmount record failed", __func__);
547 "xfs_log_unmount: unmount record failed");
548 }
549 547
550 548
551 spin_lock(&log->l_icloglock); 549 spin_lock(&log->l_icloglock);
@@ -852,7 +850,7 @@ xlog_space_left(
852 * In this case we just want to return the size of the 850 * In this case we just want to return the size of the
853 * log as the amount of space left. 851 * log as the amount of space left.
854 */ 852 */
855 xfs_fs_cmn_err(CE_ALERT, log->l_mp, 853 xfs_alert(log->l_mp,
856 "xlog_space_left: head behind tail\n" 854 "xlog_space_left: head behind tail\n"
857 " tail_cycle = %d, tail_bytes = %d\n" 855 " tail_cycle = %d, tail_bytes = %d\n"
858 " GH cycle = %d, GH bytes = %d", 856 " GH cycle = %d, GH bytes = %d",
@@ -1001,7 +999,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1001 999
1002 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); 1000 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
1003 if (!log) { 1001 if (!log) {
1004 xlog_warn("XFS: Log allocation failed: No memory!"); 1002 xfs_warn(mp, "Log allocation failed: No memory!");
1005 goto out; 1003 goto out;
1006 } 1004 }
1007 1005
@@ -1029,24 +1027,24 @@ xlog_alloc_log(xfs_mount_t *mp,
1029 if (xfs_sb_version_hassector(&mp->m_sb)) { 1027 if (xfs_sb_version_hassector(&mp->m_sb)) {
1030 log2_size = mp->m_sb.sb_logsectlog; 1028 log2_size = mp->m_sb.sb_logsectlog;
1031 if (log2_size < BBSHIFT) { 1029 if (log2_size < BBSHIFT) {
1032 xlog_warn("XFS: Log sector size too small " 1030 xfs_warn(mp, "Log sector size too small (0x%x < 0x%x)",
1033 "(0x%x < 0x%x)", log2_size, BBSHIFT); 1031 log2_size, BBSHIFT);
1034 goto out_free_log; 1032 goto out_free_log;
1035 } 1033 }
1036 1034
1037 log2_size -= BBSHIFT; 1035 log2_size -= BBSHIFT;
1038 if (log2_size > mp->m_sectbb_log) { 1036 if (log2_size > mp->m_sectbb_log) {
1039 xlog_warn("XFS: Log sector size too large " 1037 xfs_warn(mp, "Log sector size too large (0x%x > 0x%x)",
1040 "(0x%x > 0x%x)", log2_size, mp->m_sectbb_log); 1038 log2_size, mp->m_sectbb_log);
1041 goto out_free_log; 1039 goto out_free_log;
1042 } 1040 }
1043 1041
1044 /* for larger sector sizes, must have v2 or external log */ 1042 /* for larger sector sizes, must have v2 or external log */
1045 if (log2_size && log->l_logBBstart > 0 && 1043 if (log2_size && log->l_logBBstart > 0 &&
1046 !xfs_sb_version_haslogv2(&mp->m_sb)) { 1044 !xfs_sb_version_haslogv2(&mp->m_sb)) {
1047 1045 xfs_warn(mp,
1048 xlog_warn("XFS: log sector size (0x%x) invalid " 1046 "log sector size (0x%x) invalid for configuration.",
1049 "for configuration.", log2_size); 1047 log2_size);
1050 goto out_free_log; 1048 goto out_free_log;
1051 } 1049 }
1052 } 1050 }
@@ -1563,38 +1561,36 @@ xlog_print_tic_res(
1563 "SWAPEXT" 1561 "SWAPEXT"
1564 }; 1562 };
1565 1563
1566 xfs_fs_cmn_err(CE_WARN, mp, 1564 xfs_warn(mp,
1567 "xfs_log_write: reservation summary:\n" 1565 "xfs_log_write: reservation summary:\n"
1568 " trans type = %s (%u)\n" 1566 " trans type = %s (%u)\n"
1569 " unit res = %d bytes\n" 1567 " unit res = %d bytes\n"
1570 " current res = %d bytes\n" 1568 " current res = %d bytes\n"
1571 " total reg = %u bytes (o/flow = %u bytes)\n" 1569 " total reg = %u bytes (o/flow = %u bytes)\n"
1572 " ophdrs = %u (ophdr space = %u bytes)\n" 1570 " ophdrs = %u (ophdr space = %u bytes)\n"
1573 " ophdr + reg = %u bytes\n" 1571 " ophdr + reg = %u bytes\n"
1574 " num regions = %u\n", 1572 " num regions = %u\n",
1575 ((ticket->t_trans_type <= 0 || 1573 ((ticket->t_trans_type <= 0 ||
1576 ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? 1574 ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
1577 "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), 1575 "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]),
1578 ticket->t_trans_type, 1576 ticket->t_trans_type,
1579 ticket->t_unit_res, 1577 ticket->t_unit_res,
1580 ticket->t_curr_res, 1578 ticket->t_curr_res,
1581 ticket->t_res_arr_sum, ticket->t_res_o_flow, 1579 ticket->t_res_arr_sum, ticket->t_res_o_flow,
1582 ticket->t_res_num_ophdrs, ophdr_spc, 1580 ticket->t_res_num_ophdrs, ophdr_spc,
1583 ticket->t_res_arr_sum + 1581 ticket->t_res_arr_sum +
1584 ticket->t_res_o_flow + ophdr_spc, 1582 ticket->t_res_o_flow + ophdr_spc,
1585 ticket->t_res_num); 1583 ticket->t_res_num);
1586 1584
1587 for (i = 0; i < ticket->t_res_num; i++) { 1585 for (i = 0; i < ticket->t_res_num; i++) {
1588 uint r_type = ticket->t_res_arr[i].r_type; 1586 uint r_type = ticket->t_res_arr[i].r_type;
1589 cmn_err(CE_WARN, 1587 xfs_warn(mp, "region[%u]: %s - %u bytes\n", i,
1590 "region[%u]: %s - %u bytes\n",
1591 i,
1592 ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? 1588 ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
1593 "bad-rtype" : res_type_str[r_type-1]), 1589 "bad-rtype" : res_type_str[r_type-1]),
1594 ticket->t_res_arr[i].r_len); 1590 ticket->t_res_arr[i].r_len);
1595 } 1591 }
1596 1592
1597 xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp, 1593 xfs_alert_tag(mp, XFS_PTAG_LOGRES,
1598 "xfs_log_write: reservation ran out. Need to up reservation"); 1594 "xfs_log_write: reservation ran out. Need to up reservation");
1599 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1595 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1600} 1596}
@@ -1682,7 +1678,7 @@ xlog_write_setup_ophdr(
1682 case XFS_LOG: 1678 case XFS_LOG:
1683 break; 1679 break;
1684 default: 1680 default:
1685 xfs_fs_cmn_err(CE_WARN, log->l_mp, 1681 xfs_warn(log->l_mp,
1686 "Bad XFS transaction clientid 0x%x in ticket 0x%p", 1682 "Bad XFS transaction clientid 0x%x in ticket 0x%p",
1687 ophdr->oh_clientid, ticket); 1683 ophdr->oh_clientid, ticket);
1688 return NULL; 1684 return NULL;
@@ -2264,7 +2260,7 @@ xlog_state_do_callback(
2264 if (repeats > 5000) { 2260 if (repeats > 5000) {
2265 flushcnt += repeats; 2261 flushcnt += repeats;
2266 repeats = 0; 2262 repeats = 0;
2267 xfs_fs_cmn_err(CE_WARN, log->l_mp, 2263 xfs_warn(log->l_mp,
2268 "%s: possible infinite loop (%d iterations)", 2264 "%s: possible infinite loop (%d iterations)",
2269 __func__, flushcnt); 2265 __func__, flushcnt);
2270 } 2266 }
@@ -3052,10 +3048,8 @@ xfs_log_force(
3052 int error; 3048 int error;
3053 3049
3054 error = _xfs_log_force(mp, flags, NULL); 3050 error = _xfs_log_force(mp, flags, NULL);
3055 if (error) { 3051 if (error)
3056 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " 3052 xfs_warn(mp, "%s: error %d returned.", __func__, error);
3057 "error %d returned.", error);
3058 }
3059} 3053}
3060 3054
3061/* 3055/*
@@ -3204,10 +3198,8 @@ xfs_log_force_lsn(
3204 int error; 3198 int error;
3205 3199
3206 error = _xfs_log_force_lsn(mp, lsn, flags, NULL); 3200 error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
3207 if (error) { 3201 if (error)
3208 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " 3202 xfs_warn(mp, "%s: error %d returned.", __func__, error);
3209 "error %d returned.", error);
3210 }
3211} 3203}
3212 3204
3213/* 3205/*
@@ -3412,7 +3404,7 @@ xlog_verify_dest_ptr(
3412 } 3404 }
3413 3405
3414 if (!good_ptr) 3406 if (!good_ptr)
3415 xlog_panic("xlog_verify_dest_ptr: invalid ptr"); 3407 xfs_emerg(log->l_mp, "%s: invalid ptr", __func__);
3416} 3408}
3417 3409
3418STATIC void 3410STATIC void
@@ -3448,16 +3440,16 @@ xlog_verify_tail_lsn(xlog_t *log,
3448 blocks = 3440 blocks =
3449 log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn)); 3441 log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn));
3450 if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize)) 3442 if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize))
3451 xlog_panic("xlog_verify_tail_lsn: ran out of log space"); 3443 xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
3452 } else { 3444 } else {
3453 ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle); 3445 ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle);
3454 3446
3455 if (BLOCK_LSN(tail_lsn) == log->l_prev_block) 3447 if (BLOCK_LSN(tail_lsn) == log->l_prev_block)
3456 xlog_panic("xlog_verify_tail_lsn: tail wrapped"); 3448 xfs_emerg(log->l_mp, "%s: tail wrapped", __func__);
3457 3449
3458 blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block; 3450 blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block;
3459 if (blocks < BTOBB(iclog->ic_offset) + 1) 3451 if (blocks < BTOBB(iclog->ic_offset) + 1)
3460 xlog_panic("xlog_verify_tail_lsn: ran out of log space"); 3452 xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
3461 } 3453 }
3462} /* xlog_verify_tail_lsn */ 3454} /* xlog_verify_tail_lsn */
3463 3455
@@ -3497,22 +3489,23 @@ xlog_verify_iclog(xlog_t *log,
3497 icptr = log->l_iclog; 3489 icptr = log->l_iclog;
3498 for (i=0; i < log->l_iclog_bufs; i++) { 3490 for (i=0; i < log->l_iclog_bufs; i++) {
3499 if (icptr == NULL) 3491 if (icptr == NULL)
3500 xlog_panic("xlog_verify_iclog: invalid ptr"); 3492 xfs_emerg(log->l_mp, "%s: invalid ptr", __func__);
3501 icptr = icptr->ic_next; 3493 icptr = icptr->ic_next;
3502 } 3494 }
3503 if (icptr != log->l_iclog) 3495 if (icptr != log->l_iclog)
3504 xlog_panic("xlog_verify_iclog: corrupt iclog ring"); 3496 xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__);
3505 spin_unlock(&log->l_icloglock); 3497 spin_unlock(&log->l_icloglock);
3506 3498
3507 /* check log magic numbers */ 3499 /* check log magic numbers */
3508 if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM) 3500 if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM)
3509 xlog_panic("xlog_verify_iclog: invalid magic num"); 3501 xfs_emerg(log->l_mp, "%s: invalid magic num", __func__);
3510 3502
3511 ptr = (xfs_caddr_t) &iclog->ic_header; 3503 ptr = (xfs_caddr_t) &iclog->ic_header;
3512 for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; 3504 for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count;
3513 ptr += BBSIZE) { 3505 ptr += BBSIZE) {
3514 if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) 3506 if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM)
3515 xlog_panic("xlog_verify_iclog: unexpected magic num"); 3507 xfs_emerg(log->l_mp, "%s: unexpected magic num",
3508 __func__);
3516 } 3509 }
3517 3510
3518 /* check fields */ 3511 /* check fields */
@@ -3542,9 +3535,10 @@ xlog_verify_iclog(xlog_t *log,
3542 } 3535 }
3543 } 3536 }
3544 if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) 3537 if (clientid != XFS_TRANSACTION && clientid != XFS_LOG)
3545 cmn_err(CE_WARN, "xlog_verify_iclog: " 3538 xfs_warn(log->l_mp,
3546 "invalid clientid %d op 0x%p offset 0x%lx", 3539 "%s: invalid clientid %d op 0x%p offset 0x%lx",
3547 clientid, ophead, (unsigned long)field_offset); 3540 __func__, clientid, ophead,
3541 (unsigned long)field_offset);
3548 3542
3549 /* check length */ 3543 /* check length */
3550 field_offset = (__psint_t) 3544 field_offset = (__psint_t)
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index d5f8be8f4bf6..15dbf1f9c2be 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -87,10 +87,6 @@ static inline uint xlog_get_client_id(__be32 i)
87 return be32_to_cpu(i) >> 24; 87 return be32_to_cpu(i) >> 24;
88} 88}
89 89
90#define xlog_panic(args...) cmn_err(CE_PANIC, ## args)
91#define xlog_exit(args...) cmn_err(CE_PANIC, ## args)
92#define xlog_warn(args...) cmn_err(CE_WARN, ## args)
93
94/* 90/*
95 * In core log state 91 * In core log state
96 */ 92 */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index aa0ebb776903..0c4a5618e7af 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -92,7 +92,7 @@ xlog_get_bp(
92 int nbblks) 92 int nbblks)
93{ 93{
94 if (!xlog_buf_bbcount_valid(log, nbblks)) { 94 if (!xlog_buf_bbcount_valid(log, nbblks)) {
95 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", 95 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
96 nbblks); 96 nbblks);
97 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 97 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
98 return NULL; 98 return NULL;
@@ -160,7 +160,7 @@ xlog_bread_noalign(
160 int error; 160 int error;
161 161
162 if (!xlog_buf_bbcount_valid(log, nbblks)) { 162 if (!xlog_buf_bbcount_valid(log, nbblks)) {
163 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", 163 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
164 nbblks); 164 nbblks);
165 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 165 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
166 return EFSCORRUPTED; 166 return EFSCORRUPTED;
@@ -219,7 +219,7 @@ xlog_bwrite(
219 int error; 219 int error;
220 220
221 if (!xlog_buf_bbcount_valid(log, nbblks)) { 221 if (!xlog_buf_bbcount_valid(log, nbblks)) {
222 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", 222 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
223 nbblks); 223 nbblks);
224 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 224 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
225 return EFSCORRUPTED; 225 return EFSCORRUPTED;
@@ -254,9 +254,9 @@ xlog_header_check_dump(
254 xfs_mount_t *mp, 254 xfs_mount_t *mp,
255 xlog_rec_header_t *head) 255 xlog_rec_header_t *head)
256{ 256{
257 cmn_err(CE_DEBUG, "%s: SB : uuid = %pU, fmt = %d\n", 257 xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d\n",
258 __func__, &mp->m_sb.sb_uuid, XLOG_FMT); 258 __func__, &mp->m_sb.sb_uuid, XLOG_FMT);
259 cmn_err(CE_DEBUG, " log : uuid = %pU, fmt = %d\n", 259 xfs_debug(mp, " log : uuid = %pU, fmt = %d\n",
260 &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); 260 &head->h_fs_uuid, be32_to_cpu(head->h_fmt));
261} 261}
262#else 262#else
@@ -279,15 +279,15 @@ xlog_header_check_recover(
279 * a dirty log created in IRIX. 279 * a dirty log created in IRIX.
280 */ 280 */
281 if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { 281 if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) {
282 xlog_warn( 282 xfs_warn(mp,
283 "XFS: dirty log written in incompatible format - can't recover"); 283 "dirty log written in incompatible format - can't recover");
284 xlog_header_check_dump(mp, head); 284 xlog_header_check_dump(mp, head);
285 XFS_ERROR_REPORT("xlog_header_check_recover(1)", 285 XFS_ERROR_REPORT("xlog_header_check_recover(1)",
286 XFS_ERRLEVEL_HIGH, mp); 286 XFS_ERRLEVEL_HIGH, mp);
287 return XFS_ERROR(EFSCORRUPTED); 287 return XFS_ERROR(EFSCORRUPTED);
288 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { 288 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
289 xlog_warn( 289 xfs_warn(mp,
290 "XFS: dirty log entry has mismatched uuid - can't recover"); 290 "dirty log entry has mismatched uuid - can't recover");
291 xlog_header_check_dump(mp, head); 291 xlog_header_check_dump(mp, head);
292 XFS_ERROR_REPORT("xlog_header_check_recover(2)", 292 XFS_ERROR_REPORT("xlog_header_check_recover(2)",
293 XFS_ERRLEVEL_HIGH, mp); 293 XFS_ERRLEVEL_HIGH, mp);
@@ -312,9 +312,9 @@ xlog_header_check_mount(
312 * h_fs_uuid is nil, we assume this log was last mounted 312 * h_fs_uuid is nil, we assume this log was last mounted
313 * by IRIX and continue. 313 * by IRIX and continue.
314 */ 314 */
315 xlog_warn("XFS: nil uuid in log - IRIX style log"); 315 xfs_warn(mp, "nil uuid in log - IRIX style log");
316 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { 316 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
317 xlog_warn("XFS: log has mismatched uuid - can't recover"); 317 xfs_warn(mp, "log has mismatched uuid - can't recover");
318 xlog_header_check_dump(mp, head); 318 xlog_header_check_dump(mp, head);
319 XFS_ERROR_REPORT("xlog_header_check_mount", 319 XFS_ERROR_REPORT("xlog_header_check_mount",
320 XFS_ERRLEVEL_HIGH, mp); 320 XFS_ERRLEVEL_HIGH, mp);
@@ -490,8 +490,8 @@ xlog_find_verify_log_record(
490 for (i = (*last_blk) - 1; i >= 0; i--) { 490 for (i = (*last_blk) - 1; i >= 0; i--) {
491 if (i < start_blk) { 491 if (i < start_blk) {
492 /* valid log record not found */ 492 /* valid log record not found */
493 xlog_warn( 493 xfs_warn(log->l_mp,
494 "XFS: Log inconsistent (didn't find previous header)"); 494 "Log inconsistent (didn't find previous header)");
495 ASSERT(0); 495 ASSERT(0);
496 error = XFS_ERROR(EIO); 496 error = XFS_ERROR(EIO);
497 goto out; 497 goto out;
@@ -591,12 +591,12 @@ xlog_find_head(
591 * mkfs etc write a dummy unmount record to a fresh 591 * mkfs etc write a dummy unmount record to a fresh
592 * log so we can store the uuid in there 592 * log so we can store the uuid in there
593 */ 593 */
594 xlog_warn("XFS: totally zeroed log"); 594 xfs_warn(log->l_mp, "totally zeroed log");
595 } 595 }
596 596
597 return 0; 597 return 0;
598 } else if (error) { 598 } else if (error) {
599 xlog_warn("XFS: empty log check failed"); 599 xfs_warn(log->l_mp, "empty log check failed");
600 return error; 600 return error;
601 } 601 }
602 602
@@ -819,7 +819,7 @@ validate_head:
819 xlog_put_bp(bp); 819 xlog_put_bp(bp);
820 820
821 if (error) 821 if (error)
822 xlog_warn("XFS: failed to find log head"); 822 xfs_warn(log->l_mp, "failed to find log head");
823 return error; 823 return error;
824} 824}
825 825
@@ -912,7 +912,7 @@ xlog_find_tail(
912 } 912 }
913 } 913 }
914 if (!found) { 914 if (!found) {
915 xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); 915 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
916 ASSERT(0); 916 ASSERT(0);
917 return XFS_ERROR(EIO); 917 return XFS_ERROR(EIO);
918 } 918 }
@@ -1028,7 +1028,7 @@ done:
1028 xlog_put_bp(bp); 1028 xlog_put_bp(bp);
1029 1029
1030 if (error) 1030 if (error)
1031 xlog_warn("XFS: failed to locate log tail"); 1031 xfs_warn(log->l_mp, "failed to locate log tail");
1032 return error; 1032 return error;
1033} 1033}
1034 1034
@@ -1092,7 +1092,8 @@ xlog_find_zeroed(
1092 * the first block must be 1. If it's not, maybe we're 1092 * the first block must be 1. If it's not, maybe we're
1093 * not looking at a log... Bail out. 1093 * not looking at a log... Bail out.
1094 */ 1094 */
1095 xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); 1095 xfs_warn(log->l_mp,
1096 "Log inconsistent or not a log (last==0, first!=1)");
1096 return XFS_ERROR(EINVAL); 1097 return XFS_ERROR(EINVAL);
1097 } 1098 }
1098 1099
@@ -1506,8 +1507,8 @@ xlog_recover_add_to_trans(
1506 if (list_empty(&trans->r_itemq)) { 1507 if (list_empty(&trans->r_itemq)) {
1507 /* we need to catch log corruptions here */ 1508 /* we need to catch log corruptions here */
1508 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { 1509 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
1509 xlog_warn("XFS: xlog_recover_add_to_trans: " 1510 xfs_warn(log->l_mp, "%s: bad header magic number",
1510 "bad header magic number"); 1511 __func__);
1511 ASSERT(0); 1512 ASSERT(0);
1512 return XFS_ERROR(EIO); 1513 return XFS_ERROR(EIO);
1513 } 1514 }
@@ -1534,8 +1535,8 @@ xlog_recover_add_to_trans(
1534 if (item->ri_total == 0) { /* first region to be added */ 1535 if (item->ri_total == 0) { /* first region to be added */
1535 if (in_f->ilf_size == 0 || 1536 if (in_f->ilf_size == 0 ||
1536 in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { 1537 in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
1537 xlog_warn( 1538 xfs_warn(log->l_mp,
1538 "XFS: bad number of regions (%d) in inode log format", 1539 "bad number of regions (%d) in inode log format",
1539 in_f->ilf_size); 1540 in_f->ilf_size);
1540 ASSERT(0); 1541 ASSERT(0);
1541 return XFS_ERROR(EIO); 1542 return XFS_ERROR(EIO);
@@ -1592,8 +1593,9 @@ xlog_recover_reorder_trans(
1592 list_move_tail(&item->ri_list, &trans->r_itemq); 1593 list_move_tail(&item->ri_list, &trans->r_itemq);
1593 break; 1594 break;
1594 default: 1595 default:
1595 xlog_warn( 1596 xfs_warn(log->l_mp,
1596 "XFS: xlog_recover_reorder_trans: unrecognized type of log operation"); 1597 "%s: unrecognized type of log operation",
1598 __func__);
1597 ASSERT(0); 1599 ASSERT(0);
1598 return XFS_ERROR(EIO); 1600 return XFS_ERROR(EIO);
1599 } 1601 }
@@ -1803,8 +1805,9 @@ xlog_recover_do_inode_buffer(
1803 logged_nextp = item->ri_buf[item_index].i_addr + 1805 logged_nextp = item->ri_buf[item_index].i_addr +
1804 next_unlinked_offset - reg_buf_offset; 1806 next_unlinked_offset - reg_buf_offset;
1805 if (unlikely(*logged_nextp == 0)) { 1807 if (unlikely(*logged_nextp == 0)) {
1806 xfs_fs_cmn_err(CE_ALERT, mp, 1808 xfs_alert(mp,
1807 "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field", 1809 "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). "
1810 "Trying to replay bad (0) inode di_next_unlinked field.",
1808 item, bp); 1811 item, bp);
1809 XFS_ERROR_REPORT("xlog_recover_do_inode_buf", 1812 XFS_ERROR_REPORT("xlog_recover_do_inode_buf",
1810 XFS_ERRLEVEL_LOW, mp); 1813 XFS_ERRLEVEL_LOW, mp);
@@ -1863,17 +1866,17 @@ xlog_recover_do_reg_buffer(
1863 if (buf_f->blf_flags & 1866 if (buf_f->blf_flags &
1864 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 1867 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
1865 if (item->ri_buf[i].i_addr == NULL) { 1868 if (item->ri_buf[i].i_addr == NULL) {
1866 cmn_err(CE_ALERT, 1869 xfs_alert(mp,
1867 "XFS: NULL dquot in %s.", __func__); 1870 "XFS: NULL dquot in %s.", __func__);
1868 goto next; 1871 goto next;
1869 } 1872 }
1870 if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { 1873 if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) {
1871 cmn_err(CE_ALERT, 1874 xfs_alert(mp,
1872 "XFS: dquot too small (%d) in %s.", 1875 "XFS: dquot too small (%d) in %s.",
1873 item->ri_buf[i].i_len, __func__); 1876 item->ri_buf[i].i_len, __func__);
1874 goto next; 1877 goto next;
1875 } 1878 }
1876 error = xfs_qm_dqcheck(item->ri_buf[i].i_addr, 1879 error = xfs_qm_dqcheck(mp, item->ri_buf[i].i_addr,
1877 -1, 0, XFS_QMOPT_DOWARN, 1880 -1, 0, XFS_QMOPT_DOWARN,
1878 "dquot_buf_recover"); 1881 "dquot_buf_recover");
1879 if (error) 1882 if (error)
@@ -1898,6 +1901,7 @@ xlog_recover_do_reg_buffer(
1898 */ 1901 */
1899int 1902int
1900xfs_qm_dqcheck( 1903xfs_qm_dqcheck(
1904 struct xfs_mount *mp,
1901 xfs_disk_dquot_t *ddq, 1905 xfs_disk_dquot_t *ddq,
1902 xfs_dqid_t id, 1906 xfs_dqid_t id,
1903 uint type, /* used only when IO_dorepair is true */ 1907 uint type, /* used only when IO_dorepair is true */
@@ -1924,14 +1928,14 @@ xfs_qm_dqcheck(
1924 */ 1928 */
1925 if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { 1929 if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) {
1926 if (flags & XFS_QMOPT_DOWARN) 1930 if (flags & XFS_QMOPT_DOWARN)
1927 cmn_err(CE_ALERT, 1931 xfs_alert(mp,
1928 "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", 1932 "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
1929 str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); 1933 str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC);
1930 errs++; 1934 errs++;
1931 } 1935 }
1932 if (ddq->d_version != XFS_DQUOT_VERSION) { 1936 if (ddq->d_version != XFS_DQUOT_VERSION) {
1933 if (flags & XFS_QMOPT_DOWARN) 1937 if (flags & XFS_QMOPT_DOWARN)
1934 cmn_err(CE_ALERT, 1938 xfs_alert(mp,
1935 "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", 1939 "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
1936 str, id, ddq->d_version, XFS_DQUOT_VERSION); 1940 str, id, ddq->d_version, XFS_DQUOT_VERSION);
1937 errs++; 1941 errs++;
@@ -1941,7 +1945,7 @@ xfs_qm_dqcheck(
1941 ddq->d_flags != XFS_DQ_PROJ && 1945 ddq->d_flags != XFS_DQ_PROJ &&
1942 ddq->d_flags != XFS_DQ_GROUP) { 1946 ddq->d_flags != XFS_DQ_GROUP) {
1943 if (flags & XFS_QMOPT_DOWARN) 1947 if (flags & XFS_QMOPT_DOWARN)
1944 cmn_err(CE_ALERT, 1948 xfs_alert(mp,
1945 "%s : XFS dquot ID 0x%x, unknown flags 0x%x", 1949 "%s : XFS dquot ID 0x%x, unknown flags 0x%x",
1946 str, id, ddq->d_flags); 1950 str, id, ddq->d_flags);
1947 errs++; 1951 errs++;
@@ -1949,7 +1953,7 @@ xfs_qm_dqcheck(
1949 1953
1950 if (id != -1 && id != be32_to_cpu(ddq->d_id)) { 1954 if (id != -1 && id != be32_to_cpu(ddq->d_id)) {
1951 if (flags & XFS_QMOPT_DOWARN) 1955 if (flags & XFS_QMOPT_DOWARN)
1952 cmn_err(CE_ALERT, 1956 xfs_alert(mp,
1953 "%s : ondisk-dquot 0x%p, ID mismatch: " 1957 "%s : ondisk-dquot 0x%p, ID mismatch: "
1954 "0x%x expected, found id 0x%x", 1958 "0x%x expected, found id 0x%x",
1955 str, ddq, id, be32_to_cpu(ddq->d_id)); 1959 str, ddq, id, be32_to_cpu(ddq->d_id));
@@ -1962,9 +1966,8 @@ xfs_qm_dqcheck(
1962 be64_to_cpu(ddq->d_blk_softlimit)) { 1966 be64_to_cpu(ddq->d_blk_softlimit)) {
1963 if (!ddq->d_btimer) { 1967 if (!ddq->d_btimer) {
1964 if (flags & XFS_QMOPT_DOWARN) 1968 if (flags & XFS_QMOPT_DOWARN)
1965 cmn_err(CE_ALERT, 1969 xfs_alert(mp,
1966 "%s : Dquot ID 0x%x (0x%p) " 1970 "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED",
1967 "BLK TIMER NOT STARTED",
1968 str, (int)be32_to_cpu(ddq->d_id), ddq); 1971 str, (int)be32_to_cpu(ddq->d_id), ddq);
1969 errs++; 1972 errs++;
1970 } 1973 }
@@ -1974,9 +1977,8 @@ xfs_qm_dqcheck(
1974 be64_to_cpu(ddq->d_ino_softlimit)) { 1977 be64_to_cpu(ddq->d_ino_softlimit)) {
1975 if (!ddq->d_itimer) { 1978 if (!ddq->d_itimer) {
1976 if (flags & XFS_QMOPT_DOWARN) 1979 if (flags & XFS_QMOPT_DOWARN)
1977 cmn_err(CE_ALERT, 1980 xfs_alert(mp,
1978 "%s : Dquot ID 0x%x (0x%p) " 1981 "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED",
1979 "INODE TIMER NOT STARTED",
1980 str, (int)be32_to_cpu(ddq->d_id), ddq); 1982 str, (int)be32_to_cpu(ddq->d_id), ddq);
1981 errs++; 1983 errs++;
1982 } 1984 }
@@ -1986,9 +1988,8 @@ xfs_qm_dqcheck(
1986 be64_to_cpu(ddq->d_rtb_softlimit)) { 1988 be64_to_cpu(ddq->d_rtb_softlimit)) {
1987 if (!ddq->d_rtbtimer) { 1989 if (!ddq->d_rtbtimer) {
1988 if (flags & XFS_QMOPT_DOWARN) 1990 if (flags & XFS_QMOPT_DOWARN)
1989 cmn_err(CE_ALERT, 1991 xfs_alert(mp,
1990 "%s : Dquot ID 0x%x (0x%p) " 1992 "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED",
1991 "RTBLK TIMER NOT STARTED",
1992 str, (int)be32_to_cpu(ddq->d_id), ddq); 1993 str, (int)be32_to_cpu(ddq->d_id), ddq);
1993 errs++; 1994 errs++;
1994 } 1995 }
@@ -1999,7 +2000,7 @@ xfs_qm_dqcheck(
1999 return errs; 2000 return errs;
2000 2001
2001 if (flags & XFS_QMOPT_DOWARN) 2002 if (flags & XFS_QMOPT_DOWARN)
2002 cmn_err(CE_NOTE, "Re-initializing dquot ID 0x%x", id); 2003 xfs_notice(mp, "Re-initializing dquot ID 0x%x", id);
2003 2004
2004 /* 2005 /*
2005 * Typically, a repair is only requested by quotacheck. 2006 * Typically, a repair is only requested by quotacheck.
@@ -2218,9 +2219,9 @@ xlog_recover_inode_pass2(
2218 */ 2219 */
2219 if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { 2220 if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) {
2220 xfs_buf_relse(bp); 2221 xfs_buf_relse(bp);
2221 xfs_fs_cmn_err(CE_ALERT, mp, 2222 xfs_alert(mp,
2222 "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", 2223 "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld",
2223 dip, bp, in_f->ilf_ino); 2224 __func__, dip, bp, in_f->ilf_ino);
2224 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", 2225 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
2225 XFS_ERRLEVEL_LOW, mp); 2226 XFS_ERRLEVEL_LOW, mp);
2226 error = EFSCORRUPTED; 2227 error = EFSCORRUPTED;
@@ -2229,9 +2230,9 @@ xlog_recover_inode_pass2(
2229 dicp = item->ri_buf[1].i_addr; 2230 dicp = item->ri_buf[1].i_addr;
2230 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { 2231 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
2231 xfs_buf_relse(bp); 2232 xfs_buf_relse(bp);
2232 xfs_fs_cmn_err(CE_ALERT, mp, 2233 xfs_alert(mp,
2233 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", 2234 "%s: Bad inode log record, rec ptr 0x%p, ino %Ld",
2234 item, in_f->ilf_ino); 2235 __func__, item, in_f->ilf_ino);
2235 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", 2236 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
2236 XFS_ERRLEVEL_LOW, mp); 2237 XFS_ERRLEVEL_LOW, mp);
2237 error = EFSCORRUPTED; 2238 error = EFSCORRUPTED;
@@ -2263,9 +2264,10 @@ xlog_recover_inode_pass2(
2263 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", 2264 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
2264 XFS_ERRLEVEL_LOW, mp, dicp); 2265 XFS_ERRLEVEL_LOW, mp, dicp);
2265 xfs_buf_relse(bp); 2266 xfs_buf_relse(bp);
2266 xfs_fs_cmn_err(CE_ALERT, mp, 2267 xfs_alert(mp,
2267 "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2268 "%s: Bad regular inode log record, rec ptr 0x%p, "
2268 item, dip, bp, in_f->ilf_ino); 2269 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2270 __func__, item, dip, bp, in_f->ilf_ino);
2269 error = EFSCORRUPTED; 2271 error = EFSCORRUPTED;
2270 goto error; 2272 goto error;
2271 } 2273 }
@@ -2276,9 +2278,10 @@ xlog_recover_inode_pass2(
2276 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", 2278 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
2277 XFS_ERRLEVEL_LOW, mp, dicp); 2279 XFS_ERRLEVEL_LOW, mp, dicp);
2278 xfs_buf_relse(bp); 2280 xfs_buf_relse(bp);
2279 xfs_fs_cmn_err(CE_ALERT, mp, 2281 xfs_alert(mp,
2280 "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2282 "%s: Bad dir inode log record, rec ptr 0x%p, "
2281 item, dip, bp, in_f->ilf_ino); 2283 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2284 __func__, item, dip, bp, in_f->ilf_ino);
2282 error = EFSCORRUPTED; 2285 error = EFSCORRUPTED;
2283 goto error; 2286 goto error;
2284 } 2287 }
@@ -2287,9 +2290,10 @@ xlog_recover_inode_pass2(
2287 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", 2290 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
2288 XFS_ERRLEVEL_LOW, mp, dicp); 2291 XFS_ERRLEVEL_LOW, mp, dicp);
2289 xfs_buf_relse(bp); 2292 xfs_buf_relse(bp);
2290 xfs_fs_cmn_err(CE_ALERT, mp, 2293 xfs_alert(mp,
2291 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", 2294 "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
2292 item, dip, bp, in_f->ilf_ino, 2295 "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
2296 __func__, item, dip, bp, in_f->ilf_ino,
2293 dicp->di_nextents + dicp->di_anextents, 2297 dicp->di_nextents + dicp->di_anextents,
2294 dicp->di_nblocks); 2298 dicp->di_nblocks);
2295 error = EFSCORRUPTED; 2299 error = EFSCORRUPTED;
@@ -2299,8 +2303,9 @@ xlog_recover_inode_pass2(
2299 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", 2303 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
2300 XFS_ERRLEVEL_LOW, mp, dicp); 2304 XFS_ERRLEVEL_LOW, mp, dicp);
2301 xfs_buf_relse(bp); 2305 xfs_buf_relse(bp);
2302 xfs_fs_cmn_err(CE_ALERT, mp, 2306 xfs_alert(mp,
2303 "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", 2307 "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
2308 "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
2304 item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); 2309 item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
2305 error = EFSCORRUPTED; 2310 error = EFSCORRUPTED;
2306 goto error; 2311 goto error;
@@ -2309,9 +2314,9 @@ xlog_recover_inode_pass2(
2309 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", 2314 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
2310 XFS_ERRLEVEL_LOW, mp, dicp); 2315 XFS_ERRLEVEL_LOW, mp, dicp);
2311 xfs_buf_relse(bp); 2316 xfs_buf_relse(bp);
2312 xfs_fs_cmn_err(CE_ALERT, mp, 2317 xfs_alert(mp,
2313 "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p", 2318 "%s: Bad inode log record length %d, rec ptr 0x%p",
2314 item->ri_buf[1].i_len, item); 2319 __func__, item->ri_buf[1].i_len, item);
2315 error = EFSCORRUPTED; 2320 error = EFSCORRUPTED;
2316 goto error; 2321 goto error;
2317 } 2322 }
@@ -2398,7 +2403,7 @@ xlog_recover_inode_pass2(
2398 break; 2403 break;
2399 2404
2400 default: 2405 default:
2401 xlog_warn("XFS: xlog_recover_inode_pass2: Invalid flag"); 2406 xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
2402 ASSERT(0); 2407 ASSERT(0);
2403 xfs_buf_relse(bp); 2408 xfs_buf_relse(bp);
2404 error = EIO; 2409 error = EIO;
@@ -2467,13 +2472,11 @@ xlog_recover_dquot_pass2(
2467 2472
2468 recddq = item->ri_buf[1].i_addr; 2473 recddq = item->ri_buf[1].i_addr;
2469 if (recddq == NULL) { 2474 if (recddq == NULL) {
2470 cmn_err(CE_ALERT, 2475 xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
2471 "XFS: NULL dquot in %s.", __func__);
2472 return XFS_ERROR(EIO); 2476 return XFS_ERROR(EIO);
2473 } 2477 }
2474 if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { 2478 if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) {
2475 cmn_err(CE_ALERT, 2479 xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
2476 "XFS: dquot too small (%d) in %s.",
2477 item->ri_buf[1].i_len, __func__); 2480 item->ri_buf[1].i_len, __func__);
2478 return XFS_ERROR(EIO); 2481 return XFS_ERROR(EIO);
2479 } 2482 }
@@ -2498,12 +2501,10 @@ xlog_recover_dquot_pass2(
2498 */ 2501 */
2499 dq_f = item->ri_buf[0].i_addr; 2502 dq_f = item->ri_buf[0].i_addr;
2500 ASSERT(dq_f); 2503 ASSERT(dq_f);
2501 if ((error = xfs_qm_dqcheck(recddq, 2504 error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
2502 dq_f->qlf_id, 2505 "xlog_recover_dquot_pass2 (log copy)");
2503 0, XFS_QMOPT_DOWARN, 2506 if (error)
2504 "xlog_recover_dquot_pass2 (log copy)"))) {
2505 return XFS_ERROR(EIO); 2507 return XFS_ERROR(EIO);
2506 }
2507 ASSERT(dq_f->qlf_len == 1); 2508 ASSERT(dq_f->qlf_len == 1);
2508 2509
2509 error = xfs_read_buf(mp, mp->m_ddev_targp, 2510 error = xfs_read_buf(mp, mp->m_ddev_targp,
@@ -2523,8 +2524,9 @@ xlog_recover_dquot_pass2(
2523 * was among a chunk of dquots created earlier, and we did some 2524 * was among a chunk of dquots created earlier, and we did some
2524 * minimal initialization then. 2525 * minimal initialization then.
2525 */ 2526 */
2526 if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, 2527 error = xfs_qm_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
2527 "xlog_recover_dquot_pass2")) { 2528 "xlog_recover_dquot_pass2");
2529 if (error) {
2528 xfs_buf_relse(bp); 2530 xfs_buf_relse(bp);
2529 return XFS_ERROR(EIO); 2531 return XFS_ERROR(EIO);
2530 } 2532 }
@@ -2676,9 +2678,8 @@ xlog_recover_commit_pass1(
2676 /* nothing to do in pass 1 */ 2678 /* nothing to do in pass 1 */
2677 return 0; 2679 return 0;
2678 default: 2680 default:
2679 xlog_warn( 2681 xfs_warn(log->l_mp, "%s: invalid item type (%d)",
2680 "XFS: invalid item type (%d) xlog_recover_commit_pass1", 2682 __func__, ITEM_TYPE(item));
2681 ITEM_TYPE(item));
2682 ASSERT(0); 2683 ASSERT(0);
2683 return XFS_ERROR(EIO); 2684 return XFS_ERROR(EIO);
2684 } 2685 }
@@ -2707,9 +2708,8 @@ xlog_recover_commit_pass2(
2707 /* nothing to do in pass2 */ 2708 /* nothing to do in pass2 */
2708 return 0; 2709 return 0;
2709 default: 2710 default:
2710 xlog_warn( 2711 xfs_warn(log->l_mp, "%s: invalid item type (%d)",
2711 "XFS: invalid item type (%d) xlog_recover_commit_pass2", 2712 __func__, ITEM_TYPE(item));
2712 ITEM_TYPE(item));
2713 ASSERT(0); 2713 ASSERT(0);
2714 return XFS_ERROR(EIO); 2714 return XFS_ERROR(EIO);
2715 } 2715 }
@@ -2751,10 +2751,11 @@ xlog_recover_commit_trans(
2751 2751
2752STATIC int 2752STATIC int
2753xlog_recover_unmount_trans( 2753xlog_recover_unmount_trans(
2754 struct log *log,
2754 xlog_recover_t *trans) 2755 xlog_recover_t *trans)
2755{ 2756{
2756 /* Do nothing now */ 2757 /* Do nothing now */
2757 xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); 2758 xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
2758 return 0; 2759 return 0;
2759} 2760}
2760 2761
@@ -2797,8 +2798,8 @@ xlog_recover_process_data(
2797 dp += sizeof(xlog_op_header_t); 2798 dp += sizeof(xlog_op_header_t);
2798 if (ohead->oh_clientid != XFS_TRANSACTION && 2799 if (ohead->oh_clientid != XFS_TRANSACTION &&
2799 ohead->oh_clientid != XFS_LOG) { 2800 ohead->oh_clientid != XFS_LOG) {
2800 xlog_warn( 2801 xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
2801 "XFS: xlog_recover_process_data: bad clientid"); 2802 __func__, ohead->oh_clientid);
2802 ASSERT(0); 2803 ASSERT(0);
2803 return (XFS_ERROR(EIO)); 2804 return (XFS_ERROR(EIO));
2804 } 2805 }
@@ -2811,8 +2812,8 @@ xlog_recover_process_data(
2811 be64_to_cpu(rhead->h_lsn)); 2812 be64_to_cpu(rhead->h_lsn));
2812 } else { 2813 } else {
2813 if (dp + be32_to_cpu(ohead->oh_len) > lp) { 2814 if (dp + be32_to_cpu(ohead->oh_len) > lp) {
2814 xlog_warn( 2815 xfs_warn(log->l_mp, "%s: bad length 0x%x",
2815 "XFS: xlog_recover_process_data: bad length"); 2816 __func__, be32_to_cpu(ohead->oh_len));
2816 WARN_ON(1); 2817 WARN_ON(1);
2817 return (XFS_ERROR(EIO)); 2818 return (XFS_ERROR(EIO));
2818 } 2819 }
@@ -2825,7 +2826,7 @@ xlog_recover_process_data(
2825 trans, pass); 2826 trans, pass);
2826 break; 2827 break;
2827 case XLOG_UNMOUNT_TRANS: 2828 case XLOG_UNMOUNT_TRANS:
2828 error = xlog_recover_unmount_trans(trans); 2829 error = xlog_recover_unmount_trans(log, trans);
2829 break; 2830 break;
2830 case XLOG_WAS_CONT_TRANS: 2831 case XLOG_WAS_CONT_TRANS:
2831 error = xlog_recover_add_to_cont_trans(log, 2832 error = xlog_recover_add_to_cont_trans(log,
@@ -2833,8 +2834,8 @@ xlog_recover_process_data(
2833 be32_to_cpu(ohead->oh_len)); 2834 be32_to_cpu(ohead->oh_len));
2834 break; 2835 break;
2835 case XLOG_START_TRANS: 2836 case XLOG_START_TRANS:
2836 xlog_warn( 2837 xfs_warn(log->l_mp, "%s: bad transaction",
2837 "XFS: xlog_recover_process_data: bad transaction"); 2838 __func__);
2838 ASSERT(0); 2839 ASSERT(0);
2839 error = XFS_ERROR(EIO); 2840 error = XFS_ERROR(EIO);
2840 break; 2841 break;
@@ -2844,8 +2845,8 @@ xlog_recover_process_data(
2844 dp, be32_to_cpu(ohead->oh_len)); 2845 dp, be32_to_cpu(ohead->oh_len));
2845 break; 2846 break;
2846 default: 2847 default:
2847 xlog_warn( 2848 xfs_warn(log->l_mp, "%s: bad flag 0x%x",
2848 "XFS: xlog_recover_process_data: bad flag"); 2849 __func__, flags);
2849 ASSERT(0); 2850 ASSERT(0);
2850 error = XFS_ERROR(EIO); 2851 error = XFS_ERROR(EIO);
2851 break; 2852 break;
@@ -3030,8 +3031,7 @@ xlog_recover_clear_agi_bucket(
3030out_abort: 3031out_abort:
3031 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3032 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
3032out_error: 3033out_error:
3033 xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: " 3034 xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
3034 "failed to clear agi %d. Continuing.", agno);
3035 return; 3035 return;
3036} 3036}
3037 3037
@@ -3282,7 +3282,7 @@ xlog_valid_rec_header(
3282 if (unlikely( 3282 if (unlikely(
3283 (!rhead->h_version || 3283 (!rhead->h_version ||
3284 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { 3284 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) {
3285 xlog_warn("XFS: %s: unrecognised log version (%d).", 3285 xfs_warn(log->l_mp, "%s: unrecognised log version (%d).",
3286 __func__, be32_to_cpu(rhead->h_version)); 3286 __func__, be32_to_cpu(rhead->h_version));
3287 return XFS_ERROR(EIO); 3287 return XFS_ERROR(EIO);
3288 } 3288 }
@@ -3740,10 +3740,9 @@ xlog_recover(
3740 return error; 3740 return error;
3741 } 3741 }
3742 3742
3743 cmn_err(CE_NOTE, 3743 xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
3744 "Starting XFS recovery on filesystem: %s (logdev: %s)", 3744 log->l_mp->m_logname ? log->l_mp->m_logname
3745 log->l_mp->m_fsname, log->l_mp->m_logname ? 3745 : "internal");
3746 log->l_mp->m_logname : "internal");
3747 3746
3748 error = xlog_do_recover(log, head_blk, tail_blk); 3747 error = xlog_do_recover(log, head_blk, tail_blk);
3749 log->l_flags |= XLOG_RECOVERY_NEEDED; 3748 log->l_flags |= XLOG_RECOVERY_NEEDED;
@@ -3776,9 +3775,7 @@ xlog_recover_finish(
3776 int error; 3775 int error;
3777 error = xlog_recover_process_efis(log); 3776 error = xlog_recover_process_efis(log);
3778 if (error) { 3777 if (error) {
3779 cmn_err(CE_ALERT, 3778 xfs_alert(log->l_mp, "Failed to recover EFIs");
3780 "Failed to recover EFIs on filesystem: %s",
3781 log->l_mp->m_fsname);
3782 return error; 3779 return error;
3783 } 3780 }
3784 /* 3781 /*
@@ -3793,15 +3790,12 @@ xlog_recover_finish(
3793 3790
3794 xlog_recover_check_summary(log); 3791 xlog_recover_check_summary(log);
3795 3792
3796 cmn_err(CE_NOTE, 3793 xfs_notice(log->l_mp, "Ending recovery (logdev: %s)",
3797 "Ending XFS recovery on filesystem: %s (logdev: %s)", 3794 log->l_mp->m_logname ? log->l_mp->m_logname
3798 log->l_mp->m_fsname, log->l_mp->m_logname ? 3795 : "internal");
3799 log->l_mp->m_logname : "internal");
3800 log->l_flags &= ~XLOG_RECOVERY_NEEDED; 3796 log->l_flags &= ~XLOG_RECOVERY_NEEDED;
3801 } else { 3797 } else {
3802 cmn_err(CE_DEBUG, 3798 xfs_info(log->l_mp, "Ending clean mount");
3803 "Ending clean XFS mount for filesystem: %s\n",
3804 log->l_mp->m_fsname);
3805 } 3799 }
3806 return 0; 3800 return 0;
3807} 3801}
@@ -3834,10 +3828,8 @@ xlog_recover_check_summary(
3834 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 3828 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
3835 error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); 3829 error = xfs_read_agf(mp, NULL, agno, 0, &agfbp);
3836 if (error) { 3830 if (error) {
3837 xfs_fs_cmn_err(CE_ALERT, mp, 3831 xfs_alert(mp, "%s agf read failed agno %d error %d",
3838 "xlog_recover_check_summary(agf)" 3832 __func__, agno, error);
3839 "agf read failed agno %d error %d",
3840 agno, error);
3841 } else { 3833 } else {
3842 agfp = XFS_BUF_TO_AGF(agfbp); 3834 agfp = XFS_BUF_TO_AGF(agfbp);
3843 freeblks += be32_to_cpu(agfp->agf_freeblks) + 3835 freeblks += be32_to_cpu(agfp->agf_freeblks) +
@@ -3846,7 +3838,10 @@ xlog_recover_check_summary(
3846 } 3838 }
3847 3839
3848 error = xfs_read_agi(mp, NULL, agno, &agibp); 3840 error = xfs_read_agi(mp, NULL, agno, &agibp);
3849 if (!error) { 3841 if (error) {
3842 xfs_alert(mp, "%s agi read failed agno %d error %d",
3843 __func__, agno, error);
3844 } else {
3850 struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); 3845 struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp);
3851 3846
3852 itotal += be32_to_cpu(agi->agi_count); 3847 itotal += be32_to_cpu(agi->agi_count);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d447aef84bc3..bb3f9a7b24ed 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -133,9 +133,7 @@ xfs_uuid_mount(
133 return 0; 133 return 0;
134 134
135 if (uuid_is_nil(uuid)) { 135 if (uuid_is_nil(uuid)) {
136 cmn_err(CE_WARN, 136 xfs_warn(mp, "Filesystem has nil UUID - can't mount");
137 "XFS: Filesystem %s has nil UUID - can't mount",
138 mp->m_fsname);
139 return XFS_ERROR(EINVAL); 137 return XFS_ERROR(EINVAL);
140 } 138 }
141 139
@@ -163,8 +161,7 @@ xfs_uuid_mount(
163 161
164 out_duplicate: 162 out_duplicate:
165 mutex_unlock(&xfs_uuid_table_mutex); 163 mutex_unlock(&xfs_uuid_table_mutex);
166 cmn_err(CE_WARN, "XFS: Filesystem %s has duplicate UUID - can't mount", 164 xfs_warn(mp, "Filesystem has duplicate UUID - can't mount");
167 mp->m_fsname);
168 return XFS_ERROR(EINVAL); 165 return XFS_ERROR(EINVAL);
169} 166}
170 167
@@ -311,6 +308,8 @@ xfs_mount_validate_sb(
311 xfs_sb_t *sbp, 308 xfs_sb_t *sbp,
312 int flags) 309 int flags)
313{ 310{
311 int loud = !(flags & XFS_MFSI_QUIET);
312
314 /* 313 /*
315 * If the log device and data device have the 314 * If the log device and data device have the
316 * same device number, the log is internal. 315 * same device number, the log is internal.
@@ -319,28 +318,32 @@ xfs_mount_validate_sb(
319 * a volume filesystem in a non-volume manner. 318 * a volume filesystem in a non-volume manner.
320 */ 319 */
321 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 320 if (sbp->sb_magicnum != XFS_SB_MAGIC) {
322 xfs_fs_mount_cmn_err(flags, "bad magic number"); 321 if (loud)
322 xfs_warn(mp, "bad magic number");
323 return XFS_ERROR(EWRONGFS); 323 return XFS_ERROR(EWRONGFS);
324 } 324 }
325 325
326 if (!xfs_sb_good_version(sbp)) { 326 if (!xfs_sb_good_version(sbp)) {
327 xfs_fs_mount_cmn_err(flags, "bad version"); 327 if (loud)
328 xfs_warn(mp, "bad version");
328 return XFS_ERROR(EWRONGFS); 329 return XFS_ERROR(EWRONGFS);
329 } 330 }
330 331
331 if (unlikely( 332 if (unlikely(
332 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { 333 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
333 xfs_fs_mount_cmn_err(flags, 334 if (loud)
334 "filesystem is marked as having an external log; " 335 xfs_warn(mp,
335 "specify logdev on the\nmount command line."); 336 "filesystem is marked as having an external log; "
337 "specify logdev on the mount command line.");
336 return XFS_ERROR(EINVAL); 338 return XFS_ERROR(EINVAL);
337 } 339 }
338 340
339 if (unlikely( 341 if (unlikely(
340 sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { 342 sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
341 xfs_fs_mount_cmn_err(flags, 343 if (loud)
342 "filesystem is marked as having an internal log; " 344 xfs_warn(mp,
343 "do not specify logdev on\nthe mount command line."); 345 "filesystem is marked as having an internal log; "
346 "do not specify logdev on the mount command line.");
344 return XFS_ERROR(EINVAL); 347 return XFS_ERROR(EINVAL);
345 } 348 }
346 349
@@ -369,7 +372,8 @@ xfs_mount_validate_sb(
369 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 372 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
370 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 373 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
371 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { 374 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) {
372 xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); 375 if (loud)
376 xfs_warn(mp, "SB sanity check 1 failed");
373 return XFS_ERROR(EFSCORRUPTED); 377 return XFS_ERROR(EFSCORRUPTED);
374 } 378 }
375 379
@@ -382,7 +386,8 @@ xfs_mount_validate_sb(
382 (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || 386 (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks ||
383 sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * 387 sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
384 sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { 388 sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
385 xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed"); 389 if (loud)
390 xfs_warn(mp, "SB sanity check 2 failed");
386 return XFS_ERROR(EFSCORRUPTED); 391 return XFS_ERROR(EFSCORRUPTED);
387 } 392 }
388 393
@@ -390,12 +395,12 @@ xfs_mount_validate_sb(
390 * Until this is fixed only page-sized or smaller data blocks work. 395 * Until this is fixed only page-sized or smaller data blocks work.
391 */ 396 */
392 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { 397 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
393 xfs_fs_mount_cmn_err(flags, 398 if (loud) {
394 "file system with blocksize %d bytes", 399 xfs_warn(mp,
395 sbp->sb_blocksize); 400 "File system with blocksize %d bytes. "
396 xfs_fs_mount_cmn_err(flags, 401 "Only pagesize (%ld) or less will currently work.",
397 "only pagesize (%ld) or less will currently work.", 402 sbp->sb_blocksize, PAGE_SIZE);
398 PAGE_SIZE); 403 }
399 return XFS_ERROR(ENOSYS); 404 return XFS_ERROR(ENOSYS);
400 } 405 }
401 406
@@ -409,21 +414,23 @@ xfs_mount_validate_sb(
409 case 2048: 414 case 2048:
410 break; 415 break;
411 default: 416 default:
412 xfs_fs_mount_cmn_err(flags, 417 if (loud)
413 "inode size of %d bytes not supported", 418 xfs_warn(mp, "inode size of %d bytes not supported",
414 sbp->sb_inodesize); 419 sbp->sb_inodesize);
415 return XFS_ERROR(ENOSYS); 420 return XFS_ERROR(ENOSYS);
416 } 421 }
417 422
418 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || 423 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
419 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { 424 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
420 xfs_fs_mount_cmn_err(flags, 425 if (loud)
421 "file system too large to be mounted on this system."); 426 xfs_warn(mp,
427 "file system too large to be mounted on this system.");
422 return XFS_ERROR(EFBIG); 428 return XFS_ERROR(EFBIG);
423 } 429 }
424 430
425 if (unlikely(sbp->sb_inprogress)) { 431 if (unlikely(sbp->sb_inprogress)) {
426 xfs_fs_mount_cmn_err(flags, "file system busy"); 432 if (loud)
433 xfs_warn(mp, "file system busy");
427 return XFS_ERROR(EFSCORRUPTED); 434 return XFS_ERROR(EFSCORRUPTED);
428 } 435 }
429 436
@@ -431,8 +438,9 @@ xfs_mount_validate_sb(
431 * Version 1 directory format has never worked on Linux. 438 * Version 1 directory format has never worked on Linux.
432 */ 439 */
433 if (unlikely(!xfs_sb_version_hasdirv2(sbp))) { 440 if (unlikely(!xfs_sb_version_hasdirv2(sbp))) {
434 xfs_fs_mount_cmn_err(flags, 441 if (loud)
435 "file system using version 1 directory format"); 442 xfs_warn(mp,
443 "file system using version 1 directory format");
436 return XFS_ERROR(ENOSYS); 444 return XFS_ERROR(ENOSYS);
437 } 445 }
438 446
@@ -673,6 +681,7 @@ xfs_readsb(xfs_mount_t *mp, int flags)
673 unsigned int sector_size; 681 unsigned int sector_size;
674 xfs_buf_t *bp; 682 xfs_buf_t *bp;
675 int error; 683 int error;
684 int loud = !(flags & XFS_MFSI_QUIET);
676 685
677 ASSERT(mp->m_sb_bp == NULL); 686 ASSERT(mp->m_sb_bp == NULL);
678 ASSERT(mp->m_ddev_targp != NULL); 687 ASSERT(mp->m_ddev_targp != NULL);
@@ -688,7 +697,8 @@ reread:
688 bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, 697 bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
689 XFS_SB_DADDR, sector_size, 0); 698 XFS_SB_DADDR, sector_size, 0);
690 if (!bp) { 699 if (!bp) {
691 xfs_fs_mount_cmn_err(flags, "SB buffer read failed"); 700 if (loud)
701 xfs_warn(mp, "SB buffer read failed");
692 return EIO; 702 return EIO;
693 } 703 }
694 704
@@ -699,7 +709,8 @@ reread:
699 xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); 709 xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp));
700 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); 710 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags);
701 if (error) { 711 if (error) {
702 xfs_fs_mount_cmn_err(flags, "SB validate failed"); 712 if (loud)
713 xfs_warn(mp, "SB validate failed");
703 goto release_buf; 714 goto release_buf;
704 } 715 }
705 716
@@ -707,9 +718,9 @@ reread:
707 * We must be able to do sector-sized and sector-aligned IO. 718 * We must be able to do sector-sized and sector-aligned IO.
708 */ 719 */
709 if (sector_size > mp->m_sb.sb_sectsize) { 720 if (sector_size > mp->m_sb.sb_sectsize) {
710 xfs_fs_mount_cmn_err(flags, 721 if (loud)
711 "device supports only %u byte sectors (not %u)", 722 xfs_warn(mp, "device supports %u byte sectors (not %u)",
712 sector_size, mp->m_sb.sb_sectsize); 723 sector_size, mp->m_sb.sb_sectsize);
713 error = ENOSYS; 724 error = ENOSYS;
714 goto release_buf; 725 goto release_buf;
715 } 726 }
@@ -853,8 +864,7 @@ xfs_update_alignment(xfs_mount_t *mp)
853 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || 864 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
854 (BBTOB(mp->m_swidth) & mp->m_blockmask)) { 865 (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
855 if (mp->m_flags & XFS_MOUNT_RETERR) { 866 if (mp->m_flags & XFS_MOUNT_RETERR) {
856 cmn_err(CE_WARN, 867 xfs_warn(mp, "alignment check 1 failed");
857 "XFS: alignment check 1 failed");
858 return XFS_ERROR(EINVAL); 868 return XFS_ERROR(EINVAL);
859 } 869 }
860 mp->m_dalign = mp->m_swidth = 0; 870 mp->m_dalign = mp->m_swidth = 0;
@@ -867,8 +877,9 @@ xfs_update_alignment(xfs_mount_t *mp)
867 if (mp->m_flags & XFS_MOUNT_RETERR) { 877 if (mp->m_flags & XFS_MOUNT_RETERR) {
868 return XFS_ERROR(EINVAL); 878 return XFS_ERROR(EINVAL);
869 } 879 }
870 xfs_fs_cmn_err(CE_WARN, mp, 880 xfs_warn(mp,
871"stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)", 881 "stripe alignment turned off: sunit(%d)/swidth(%d) "
882 "incompatible with agsize(%d)",
872 mp->m_dalign, mp->m_swidth, 883 mp->m_dalign, mp->m_swidth,
873 sbp->sb_agblocks); 884 sbp->sb_agblocks);
874 885
@@ -878,9 +889,9 @@ xfs_update_alignment(xfs_mount_t *mp)
878 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); 889 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
879 } else { 890 } else {
880 if (mp->m_flags & XFS_MOUNT_RETERR) { 891 if (mp->m_flags & XFS_MOUNT_RETERR) {
881 xfs_fs_cmn_err(CE_WARN, mp, 892 xfs_warn(mp,
882"stripe alignment turned off: sunit(%d) less than bsize(%d)", 893 "stripe alignment turned off: sunit(%d) less than bsize(%d)",
883 mp->m_dalign, 894 mp->m_dalign,
884 mp->m_blockmask +1); 895 mp->m_blockmask +1);
885 return XFS_ERROR(EINVAL); 896 return XFS_ERROR(EINVAL);
886 } 897 }
@@ -1026,14 +1037,14 @@ xfs_check_sizes(xfs_mount_t *mp)
1026 1037
1027 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 1038 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
1028 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 1039 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
1029 cmn_err(CE_WARN, "XFS: filesystem size mismatch detected"); 1040 xfs_warn(mp, "filesystem size mismatch detected");
1030 return XFS_ERROR(EFBIG); 1041 return XFS_ERROR(EFBIG);
1031 } 1042 }
1032 bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, 1043 bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
1033 d - XFS_FSS_TO_BB(mp, 1), 1044 d - XFS_FSS_TO_BB(mp, 1),
1034 BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); 1045 BBTOB(XFS_FSS_TO_BB(mp, 1)), 0);
1035 if (!bp) { 1046 if (!bp) {
1036 cmn_err(CE_WARN, "XFS: last sector read failed"); 1047 xfs_warn(mp, "last sector read failed");
1037 return EIO; 1048 return EIO;
1038 } 1049 }
1039 xfs_buf_relse(bp); 1050 xfs_buf_relse(bp);
@@ -1041,14 +1052,14 @@ xfs_check_sizes(xfs_mount_t *mp)
1041 if (mp->m_logdev_targp != mp->m_ddev_targp) { 1052 if (mp->m_logdev_targp != mp->m_ddev_targp) {
1042 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 1053 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
1043 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 1054 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
1044 cmn_err(CE_WARN, "XFS: log size mismatch detected"); 1055 xfs_warn(mp, "log size mismatch detected");
1045 return XFS_ERROR(EFBIG); 1056 return XFS_ERROR(EFBIG);
1046 } 1057 }
1047 bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp, 1058 bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp,
1048 d - XFS_FSB_TO_BB(mp, 1), 1059 d - XFS_FSB_TO_BB(mp, 1),
1049 XFS_FSB_TO_B(mp, 1), 0); 1060 XFS_FSB_TO_B(mp, 1), 0);
1050 if (!bp) { 1061 if (!bp) {
1051 cmn_err(CE_WARN, "XFS: log device read failed"); 1062 xfs_warn(mp, "log device read failed");
1052 return EIO; 1063 return EIO;
1053 } 1064 }
1054 xfs_buf_relse(bp); 1065 xfs_buf_relse(bp);
@@ -1086,7 +1097,7 @@ xfs_mount_reset_sbqflags(
1086 return 0; 1097 return 0;
1087 1098
1088#ifdef QUOTADEBUG 1099#ifdef QUOTADEBUG
1089 xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); 1100 xfs_notice(mp, "Writing superblock quota changes");
1090#endif 1101#endif
1091 1102
1092 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 1103 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
@@ -1094,8 +1105,7 @@ xfs_mount_reset_sbqflags(
1094 XFS_DEFAULT_LOG_COUNT); 1105 XFS_DEFAULT_LOG_COUNT);
1095 if (error) { 1106 if (error) {
1096 xfs_trans_cancel(tp, 0); 1107 xfs_trans_cancel(tp, 0);
1097 xfs_fs_cmn_err(CE_ALERT, mp, 1108 xfs_alert(mp, "%s: Superblock update failed!", __func__);
1098 "xfs_mount_reset_sbqflags: Superblock update failed!");
1099 return error; 1109 return error;
1100 } 1110 }
1101 1111
@@ -1161,8 +1171,7 @@ xfs_mountfs(
1161 * transaction subsystem is online. 1171 * transaction subsystem is online.
1162 */ 1172 */
1163 if (xfs_sb_has_mismatched_features2(sbp)) { 1173 if (xfs_sb_has_mismatched_features2(sbp)) {
1164 cmn_err(CE_WARN, 1174 xfs_warn(mp, "correcting sb_features alignment problem");
1165 "XFS: correcting sb_features alignment problem");
1166 sbp->sb_features2 |= sbp->sb_bad_features2; 1175 sbp->sb_features2 |= sbp->sb_bad_features2;
1167 sbp->sb_bad_features2 = sbp->sb_features2; 1176 sbp->sb_bad_features2 = sbp->sb_features2;
1168 mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; 1177 mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2;
@@ -1241,7 +1250,7 @@ xfs_mountfs(
1241 */ 1250 */
1242 error = xfs_rtmount_init(mp); 1251 error = xfs_rtmount_init(mp);
1243 if (error) { 1252 if (error) {
1244 cmn_err(CE_WARN, "XFS: RT mount failed"); 1253 xfs_warn(mp, "RT mount failed");
1245 goto out_remove_uuid; 1254 goto out_remove_uuid;
1246 } 1255 }
1247 1256
@@ -1272,12 +1281,12 @@ xfs_mountfs(
1272 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); 1281 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
1273 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); 1282 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
1274 if (error) { 1283 if (error) {
1275 cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error); 1284 xfs_warn(mp, "Failed per-ag init: %d", error);
1276 goto out_remove_uuid; 1285 goto out_remove_uuid;
1277 } 1286 }
1278 1287
1279 if (!sbp->sb_logblocks) { 1288 if (!sbp->sb_logblocks) {
1280 cmn_err(CE_WARN, "XFS: no log defined"); 1289 xfs_warn(mp, "no log defined");
1281 XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp); 1290 XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp);
1282 error = XFS_ERROR(EFSCORRUPTED); 1291 error = XFS_ERROR(EFSCORRUPTED);
1283 goto out_free_perag; 1292 goto out_free_perag;
@@ -1290,7 +1299,7 @@ xfs_mountfs(
1290 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), 1299 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
1291 XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); 1300 XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
1292 if (error) { 1301 if (error) {
1293 cmn_err(CE_WARN, "XFS: log mount failed"); 1302 xfs_warn(mp, "log mount failed");
1294 goto out_free_perag; 1303 goto out_free_perag;
1295 } 1304 }
1296 1305
@@ -1327,16 +1336,14 @@ xfs_mountfs(
1327 */ 1336 */
1328 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip); 1337 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip);
1329 if (error) { 1338 if (error) {
1330 cmn_err(CE_WARN, "XFS: failed to read root inode"); 1339 xfs_warn(mp, "failed to read root inode");
1331 goto out_log_dealloc; 1340 goto out_log_dealloc;
1332 } 1341 }
1333 1342
1334 ASSERT(rip != NULL); 1343 ASSERT(rip != NULL);
1335 1344
1336 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { 1345 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
1337 cmn_err(CE_WARN, "XFS: corrupted root inode"); 1346 xfs_warn(mp, "corrupted root inode %llu: not a directory",
1338 cmn_err(CE_WARN, "Device %s - root %llu is not a directory",
1339 XFS_BUFTARG_NAME(mp->m_ddev_targp),
1340 (unsigned long long)rip->i_ino); 1347 (unsigned long long)rip->i_ino);
1341 xfs_iunlock(rip, XFS_ILOCK_EXCL); 1348 xfs_iunlock(rip, XFS_ILOCK_EXCL);
1342 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, 1349 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
@@ -1356,7 +1363,7 @@ xfs_mountfs(
1356 /* 1363 /*
1357 * Free up the root inode. 1364 * Free up the root inode.
1358 */ 1365 */
1359 cmn_err(CE_WARN, "XFS: failed to read RT inodes"); 1366 xfs_warn(mp, "failed to read RT inodes");
1360 goto out_rele_rip; 1367 goto out_rele_rip;
1361 } 1368 }
1362 1369
@@ -1368,7 +1375,7 @@ xfs_mountfs(
1368 if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { 1375 if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) {
1369 error = xfs_mount_log_sb(mp, mp->m_update_flags); 1376 error = xfs_mount_log_sb(mp, mp->m_update_flags);
1370 if (error) { 1377 if (error) {
1371 cmn_err(CE_WARN, "XFS: failed to write sb changes"); 1378 xfs_warn(mp, "failed to write sb changes");
1372 goto out_rtunmount; 1379 goto out_rtunmount;
1373 } 1380 }
1374 } 1381 }
@@ -1389,10 +1396,7 @@ xfs_mountfs(
1389 * quotachecked license. 1396 * quotachecked license.
1390 */ 1397 */
1391 if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { 1398 if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
1392 cmn_err(CE_NOTE, 1399 xfs_notice(mp, "resetting quota flags");
1393 "XFS: resetting qflags for filesystem %s",
1394 mp->m_fsname);
1395
1396 error = xfs_mount_reset_sbqflags(mp); 1400 error = xfs_mount_reset_sbqflags(mp);
1397 if (error) 1401 if (error)
1398 return error; 1402 return error;
@@ -1406,7 +1410,7 @@ xfs_mountfs(
1406 */ 1410 */
1407 error = xfs_log_mount_finish(mp); 1411 error = xfs_log_mount_finish(mp);
1408 if (error) { 1412 if (error) {
1409 cmn_err(CE_WARN, "XFS: log mount finish failed"); 1413 xfs_warn(mp, "log mount finish failed");
1410 goto out_rtunmount; 1414 goto out_rtunmount;
1411 } 1415 }
1412 1416
@@ -1435,8 +1439,8 @@ xfs_mountfs(
1435 resblks = xfs_default_resblks(mp); 1439 resblks = xfs_default_resblks(mp);
1436 error = xfs_reserve_blocks(mp, &resblks, NULL); 1440 error = xfs_reserve_blocks(mp, &resblks, NULL);
1437 if (error) 1441 if (error)
1438 cmn_err(CE_WARN, "XFS: Unable to allocate reserve " 1442 xfs_warn(mp,
1439 "blocks. Continuing without a reserve pool."); 1443 "Unable to allocate reserve blocks. Continuing without reserve pool.");
1440 } 1444 }
1441 1445
1442 return 0; 1446 return 0;
@@ -1525,12 +1529,12 @@ xfs_unmountfs(
1525 resblks = 0; 1529 resblks = 0;
1526 error = xfs_reserve_blocks(mp, &resblks, NULL); 1530 error = xfs_reserve_blocks(mp, &resblks, NULL);
1527 if (error) 1531 if (error)
1528 cmn_err(CE_WARN, "XFS: Unable to free reserved block pool. " 1532 xfs_warn(mp, "Unable to free reserved block pool. "
1529 "Freespace may not be correct on next mount."); 1533 "Freespace may not be correct on next mount.");
1530 1534
1531 error = xfs_log_sbcount(mp, 1); 1535 error = xfs_log_sbcount(mp, 1);
1532 if (error) 1536 if (error)
1533 cmn_err(CE_WARN, "XFS: Unable to update superblock counters. " 1537 xfs_warn(mp, "Unable to update superblock counters. "
1534 "Freespace may not be correct on next mount."); 1538 "Freespace may not be correct on next mount.");
1535 xfs_unmountfs_writesb(mp); 1539 xfs_unmountfs_writesb(mp);
1536 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1540 xfs_unmountfs_wait(mp); /* wait for async bufs */
@@ -2013,10 +2017,8 @@ xfs_dev_is_read_only(
2013 if (xfs_readonly_buftarg(mp->m_ddev_targp) || 2017 if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
2014 xfs_readonly_buftarg(mp->m_logdev_targp) || 2018 xfs_readonly_buftarg(mp->m_logdev_targp) ||
2015 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { 2019 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
2016 cmn_err(CE_NOTE, 2020 xfs_notice(mp, "%s required on read-only device.", message);
2017 "XFS: %s required on read-only device.", message); 2021 xfs_notice(mp, "write access unavailable, cannot proceed.");
2018 cmn_err(CE_NOTE,
2019 "XFS: write access unavailable, cannot proceed.");
2020 return EROFS; 2022 return EROFS;
2021 } 2023 }
2022 return 0; 2024 return 0;
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 9bb6eda4cd21..a595f29567fe 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -382,7 +382,8 @@ static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
382 xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ 382 xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \
383 f | XFS_QMOPT_RES_REGBLKS) 383 f | XFS_QMOPT_RES_REGBLKS)
384 384
385extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); 385extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *,
386 xfs_dqid_t, uint, uint, char *);
386extern int xfs_mount_reset_sbqflags(struct xfs_mount *); 387extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
387 388
388#endif /* __KERNEL__ */ 389#endif /* __KERNEL__ */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 12a191385310..8f76fdff4f46 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -76,7 +76,7 @@ xfs_growfs_rt_alloc(
76 xfs_mount_t *mp, /* file system mount point */ 76 xfs_mount_t *mp, /* file system mount point */
77 xfs_extlen_t oblocks, /* old count of blocks */ 77 xfs_extlen_t oblocks, /* old count of blocks */
78 xfs_extlen_t nblocks, /* new count of blocks */ 78 xfs_extlen_t nblocks, /* new count of blocks */
79 xfs_ino_t ino) /* inode number (bitmap/summary) */ 79 xfs_inode_t *ip) /* inode (bitmap/summary) */
80{ 80{
81 xfs_fileoff_t bno; /* block number in file */ 81 xfs_fileoff_t bno; /* block number in file */
82 xfs_buf_t *bp; /* temporary buffer for zeroing */ 82 xfs_buf_t *bp; /* temporary buffer for zeroing */
@@ -86,7 +86,6 @@ xfs_growfs_rt_alloc(
86 xfs_fsblock_t firstblock; /* first block allocated in xaction */ 86 xfs_fsblock_t firstblock; /* first block allocated in xaction */
87 xfs_bmap_free_t flist; /* list of freed blocks */ 87 xfs_bmap_free_t flist; /* list of freed blocks */
88 xfs_fsblock_t fsbno; /* filesystem block for bno */ 88 xfs_fsblock_t fsbno; /* filesystem block for bno */
89 xfs_inode_t *ip; /* pointer to incore inode */
90 xfs_bmbt_irec_t map; /* block map output */ 89 xfs_bmbt_irec_t map; /* block map output */
91 int nmap; /* number of block maps */ 90 int nmap; /* number of block maps */
92 int resblks; /* space reservation */ 91 int resblks; /* space reservation */
@@ -112,9 +111,9 @@ xfs_growfs_rt_alloc(
112 /* 111 /*
113 * Lock the inode. 112 * Lock the inode.
114 */ 113 */
115 if ((error = xfs_trans_iget(mp, tp, ino, 0, 114 xfs_ilock(ip, XFS_ILOCK_EXCL);
116 XFS_ILOCK_EXCL, &ip))) 115 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
117 goto error_cancel; 116
118 xfs_bmap_init(&flist, &firstblock); 117 xfs_bmap_init(&flist, &firstblock);
119 /* 118 /*
120 * Allocate blocks to the bitmap file. 119 * Allocate blocks to the bitmap file.
@@ -155,9 +154,8 @@ xfs_growfs_rt_alloc(
155 /* 154 /*
156 * Lock the bitmap inode. 155 * Lock the bitmap inode.
157 */ 156 */
158 if ((error = xfs_trans_iget(mp, tp, ino, 0, 157 xfs_ilock(ip, XFS_ILOCK_EXCL);
159 XFS_ILOCK_EXCL, &ip))) 158 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
160 goto error_cancel;
161 /* 159 /*
162 * Get a buffer for the block. 160 * Get a buffer for the block.
163 */ 161 */
@@ -1854,7 +1852,6 @@ xfs_growfs_rt(
1854 xfs_rtblock_t bmbno; /* bitmap block number */ 1852 xfs_rtblock_t bmbno; /* bitmap block number */
1855 xfs_buf_t *bp; /* temporary buffer */ 1853 xfs_buf_t *bp; /* temporary buffer */
1856 int error; /* error return value */ 1854 int error; /* error return value */
1857 xfs_inode_t *ip; /* bitmap inode, used as lock */
1858 xfs_mount_t *nmp; /* new (fake) mount structure */ 1855 xfs_mount_t *nmp; /* new (fake) mount structure */
1859 xfs_drfsbno_t nrblocks; /* new number of realtime blocks */ 1856 xfs_drfsbno_t nrblocks; /* new number of realtime blocks */
1860 xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */ 1857 xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */
@@ -1918,11 +1915,11 @@ xfs_growfs_rt(
1918 /* 1915 /*
1919 * Allocate space to the bitmap and summary files, as necessary. 1916 * Allocate space to the bitmap and summary files, as necessary.
1920 */ 1917 */
1921 if ((error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, 1918 error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, mp->m_rbmip);
1922 mp->m_sb.sb_rbmino))) 1919 if (error)
1923 return error; 1920 return error;
1924 if ((error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, 1921 error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_rsumip);
1925 mp->m_sb.sb_rsumino))) 1922 if (error)
1926 return error; 1923 return error;
1927 /* 1924 /*
1928 * Allocate a new (fake) mount/sb. 1925 * Allocate a new (fake) mount/sb.
@@ -1972,10 +1969,8 @@ xfs_growfs_rt(
1972 /* 1969 /*
1973 * Lock out other callers by grabbing the bitmap inode lock. 1970 * Lock out other callers by grabbing the bitmap inode lock.
1974 */ 1971 */
1975 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 1972 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
1976 XFS_ILOCK_EXCL, &ip))) 1973 xfs_trans_ijoin_ref(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
1977 goto error_cancel;
1978 ASSERT(ip == mp->m_rbmip);
1979 /* 1974 /*
1980 * Update the bitmap inode's size. 1975 * Update the bitmap inode's size.
1981 */ 1976 */
@@ -1986,10 +1981,8 @@ xfs_growfs_rt(
1986 /* 1981 /*
1987 * Get the summary inode into the transaction. 1982 * Get the summary inode into the transaction.
1988 */ 1983 */
1989 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, 1984 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
1990 XFS_ILOCK_EXCL, &ip))) 1985 xfs_trans_ijoin_ref(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
1991 goto error_cancel;
1992 ASSERT(ip == mp->m_rsumip);
1993 /* 1986 /*
1994 * Update the summary inode's size. 1987 * Update the summary inode's size.
1995 */ 1988 */
@@ -2075,15 +2068,15 @@ xfs_rtallocate_extent(
2075 xfs_extlen_t prod, /* extent product factor */ 2068 xfs_extlen_t prod, /* extent product factor */
2076 xfs_rtblock_t *rtblock) /* out: start block allocated */ 2069 xfs_rtblock_t *rtblock) /* out: start block allocated */
2077{ 2070{
2071 xfs_mount_t *mp = tp->t_mountp;
2078 int error; /* error value */ 2072 int error; /* error value */
2079 xfs_inode_t *ip; /* inode for bitmap file */
2080 xfs_mount_t *mp; /* file system mount structure */
2081 xfs_rtblock_t r; /* result allocated block */ 2073 xfs_rtblock_t r; /* result allocated block */
2082 xfs_fsblock_t sb; /* summary file block number */ 2074 xfs_fsblock_t sb; /* summary file block number */
2083 xfs_buf_t *sumbp; /* summary file block buffer */ 2075 xfs_buf_t *sumbp; /* summary file block buffer */
2084 2076
2077 ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
2085 ASSERT(minlen > 0 && minlen <= maxlen); 2078 ASSERT(minlen > 0 && minlen <= maxlen);
2086 mp = tp->t_mountp; 2079
2087 /* 2080 /*
2088 * If prod is set then figure out what to do to minlen and maxlen. 2081 * If prod is set then figure out what to do to minlen and maxlen.
2089 */ 2082 */
@@ -2099,12 +2092,7 @@ xfs_rtallocate_extent(
2099 return 0; 2092 return 0;
2100 } 2093 }
2101 } 2094 }
2102 /* 2095
2103 * Lock out other callers by grabbing the bitmap inode lock.
2104 */
2105 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0,
2106 XFS_ILOCK_EXCL, &ip)))
2107 return error;
2108 sumbp = NULL; 2096 sumbp = NULL;
2109 /* 2097 /*
2110 * Allocate by size, or near another block, or exactly at some block. 2098 * Allocate by size, or near another block, or exactly at some block.
@@ -2123,11 +2111,12 @@ xfs_rtallocate_extent(
2123 len, &sumbp, &sb, prod, &r); 2111 len, &sumbp, &sb, prod, &r);
2124 break; 2112 break;
2125 default: 2113 default:
2114 error = EIO;
2126 ASSERT(0); 2115 ASSERT(0);
2127 } 2116 }
2128 if (error) { 2117 if (error)
2129 return error; 2118 return error;
2130 } 2119
2131 /* 2120 /*
2132 * If it worked, update the superblock. 2121 * If it worked, update the superblock.
2133 */ 2122 */
@@ -2155,7 +2144,6 @@ xfs_rtfree_extent(
2155 xfs_extlen_t len) /* length of extent freed */ 2144 xfs_extlen_t len) /* length of extent freed */
2156{ 2145{
2157 int error; /* error value */ 2146 int error; /* error value */
2158 xfs_inode_t *ip; /* bitmap file inode */
2159 xfs_mount_t *mp; /* file system mount structure */ 2147 xfs_mount_t *mp; /* file system mount structure */
2160 xfs_fsblock_t sb; /* summary file block number */ 2148 xfs_fsblock_t sb; /* summary file block number */
2161 xfs_buf_t *sumbp; /* summary file block buffer */ 2149 xfs_buf_t *sumbp; /* summary file block buffer */
@@ -2164,9 +2152,9 @@ xfs_rtfree_extent(
2164 /* 2152 /*
2165 * Synchronize by locking the bitmap inode. 2153 * Synchronize by locking the bitmap inode.
2166 */ 2154 */
2167 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 2155 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
2168 XFS_ILOCK_EXCL, &ip))) 2156 xfs_trans_ijoin_ref(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
2169 return error; 2157
2170#if defined(__KERNEL__) && defined(DEBUG) 2158#if defined(__KERNEL__) && defined(DEBUG)
2171 /* 2159 /*
2172 * Check to see that this whole range is currently allocated. 2160 * Check to see that this whole range is currently allocated.
@@ -2199,10 +2187,10 @@ xfs_rtfree_extent(
2199 */ 2187 */
2200 if (tp->t_frextents_delta + mp->m_sb.sb_frextents == 2188 if (tp->t_frextents_delta + mp->m_sb.sb_frextents ==
2201 mp->m_sb.sb_rextents) { 2189 mp->m_sb.sb_rextents) {
2202 if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) 2190 if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
2203 ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; 2191 mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
2204 *(__uint64_t *)&ip->i_d.di_atime = 0; 2192 *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0;
2205 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2193 xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
2206 } 2194 }
2207 return 0; 2195 return 0;
2208} 2196}
@@ -2222,8 +2210,8 @@ xfs_rtmount_init(
2222 if (sbp->sb_rblocks == 0) 2210 if (sbp->sb_rblocks == 0)
2223 return 0; 2211 return 0;
2224 if (mp->m_rtdev_targp == NULL) { 2212 if (mp->m_rtdev_targp == NULL) {
2225 cmn_err(CE_WARN, 2213 xfs_warn(mp,
2226 "XFS: This filesystem has a realtime volume, use rtdev=device option"); 2214 "Filesystem has a realtime volume, use rtdev=device option");
2227 return XFS_ERROR(ENODEV); 2215 return XFS_ERROR(ENODEV);
2228 } 2216 }
2229 mp->m_rsumlevels = sbp->sb_rextslog + 1; 2217 mp->m_rsumlevels = sbp->sb_rextslog + 1;
@@ -2237,7 +2225,7 @@ xfs_rtmount_init(
2237 */ 2225 */
2238 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); 2226 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
2239 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) { 2227 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) {
2240 cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu", 2228 xfs_warn(mp, "realtime mount -- %llu != %llu",
2241 (unsigned long long) XFS_BB_TO_FSB(mp, d), 2229 (unsigned long long) XFS_BB_TO_FSB(mp, d),
2242 (unsigned long long) mp->m_sb.sb_rblocks); 2230 (unsigned long long) mp->m_sb.sb_rblocks);
2243 return XFS_ERROR(EFBIG); 2231 return XFS_ERROR(EFBIG);
@@ -2246,7 +2234,7 @@ xfs_rtmount_init(
2246 d - XFS_FSB_TO_BB(mp, 1), 2234 d - XFS_FSB_TO_BB(mp, 1),
2247 XFS_FSB_TO_B(mp, 1), 0); 2235 XFS_FSB_TO_B(mp, 1), 0);
2248 if (!bp) { 2236 if (!bp) {
2249 cmn_err(CE_WARN, "XFS: realtime device size check failed"); 2237 xfs_warn(mp, "realtime device size check failed");
2250 return EIO; 2238 return EIO;
2251 } 2239 }
2252 xfs_buf_relse(bp); 2240 xfs_buf_relse(bp);
@@ -2306,20 +2294,16 @@ xfs_rtpick_extent(
2306 xfs_rtblock_t *pick) /* result rt extent */ 2294 xfs_rtblock_t *pick) /* result rt extent */
2307{ 2295{
2308 xfs_rtblock_t b; /* result block */ 2296 xfs_rtblock_t b; /* result block */
2309 int error; /* error return value */
2310 xfs_inode_t *ip; /* bitmap incore inode */
2311 int log2; /* log of sequence number */ 2297 int log2; /* log of sequence number */
2312 __uint64_t resid; /* residual after log removed */ 2298 __uint64_t resid; /* residual after log removed */
2313 __uint64_t seq; /* sequence number of file creation */ 2299 __uint64_t seq; /* sequence number of file creation */
2314 __uint64_t *seqp; /* pointer to seqno in inode */ 2300 __uint64_t *seqp; /* pointer to seqno in inode */
2315 2301
2316 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 2302 ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
2317 XFS_ILOCK_EXCL, &ip))) 2303
2318 return error; 2304 seqp = (__uint64_t *)&mp->m_rbmip->i_d.di_atime;
2319 ASSERT(ip == mp->m_rbmip); 2305 if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) {
2320 seqp = (__uint64_t *)&ip->i_d.di_atime; 2306 mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
2321 if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) {
2322 ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
2323 *seqp = 0; 2307 *seqp = 0;
2324 } 2308 }
2325 seq = *seqp; 2309 seq = *seqp;
@@ -2335,7 +2319,7 @@ xfs_rtpick_extent(
2335 b = mp->m_sb.sb_rextents - len; 2319 b = mp->m_sb.sb_rextents - len;
2336 } 2320 }
2337 *seqp = seq + 1; 2321 *seqp = seq + 1;
2338 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2322 xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
2339 *pick = b; 2323 *pick = b;
2340 return 0; 2324 return 0;
2341} 2325}
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index ff614c29b441..09e1f4f35e97 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -154,7 +154,7 @@ xfs_rtmount_init(
154 if (mp->m_sb.sb_rblocks == 0) 154 if (mp->m_sb.sb_rblocks == 0)
155 return 0; 155 return 0;
156 156
157 cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT"); 157 xfs_warn(mp, "Not built with CONFIG_XFS_RT");
158 return ENOSYS; 158 return ENOSYS;
159} 159}
160# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) 160# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 56861d5daaef..d6d6fdfe9422 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -49,9 +49,9 @@ xfs_do_force_shutdown(
49 logerror = flags & SHUTDOWN_LOG_IO_ERROR; 49 logerror = flags & SHUTDOWN_LOG_IO_ERROR;
50 50
51 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { 51 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
52 cmn_err(CE_NOTE, "xfs_force_shutdown(%s,0x%x) called from " 52 xfs_notice(mp,
53 "line %d of file %s. Return address = 0x%p", 53 "%s(0x%x) called from line %d of file %s. Return address = 0x%p",
54 mp->m_fsname, flags, lnnum, fname, __return_address); 54 __func__, flags, lnnum, fname, __return_address);
55 } 55 }
56 /* 56 /*
57 * No need to duplicate efforts. 57 * No need to duplicate efforts.
@@ -69,30 +69,25 @@ xfs_do_force_shutdown(
69 return; 69 return;
70 70
71 if (flags & SHUTDOWN_CORRUPT_INCORE) { 71 if (flags & SHUTDOWN_CORRUPT_INCORE) {
72 xfs_cmn_err(XFS_PTAG_SHUTDOWN_CORRUPT, CE_ALERT, mp, 72 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
73 "Corruption of in-memory data detected. Shutting down filesystem: %s", 73 "Corruption of in-memory data detected. Shutting down filesystem");
74 mp->m_fsname); 74 if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
75 if (XFS_ERRLEVEL_HIGH <= xfs_error_level) {
76 xfs_stack_trace(); 75 xfs_stack_trace();
77 }
78 } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { 76 } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
79 if (logerror) { 77 if (logerror) {
80 xfs_cmn_err(XFS_PTAG_SHUTDOWN_LOGERROR, CE_ALERT, mp, 78 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
81 "Log I/O Error Detected. Shutting down filesystem: %s", 79 "Log I/O Error Detected. Shutting down filesystem");
82 mp->m_fsname);
83 } else if (flags & SHUTDOWN_DEVICE_REQ) { 80 } else if (flags & SHUTDOWN_DEVICE_REQ) {
84 xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, 81 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
85 "All device paths lost. Shutting down filesystem: %s", 82 "All device paths lost. Shutting down filesystem");
86 mp->m_fsname);
87 } else if (!(flags & SHUTDOWN_REMOTE_REQ)) { 83 } else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
88 xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, 84 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
89 "I/O Error Detected. Shutting down filesystem: %s", 85 "I/O Error Detected. Shutting down filesystem");
90 mp->m_fsname);
91 } 86 }
92 } 87 }
93 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { 88 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
94 cmn_err(CE_ALERT, "Please umount the filesystem, " 89 xfs_alert(mp,
95 "and rectify the problem(s)"); 90 "Please umount the filesystem and rectify the problem(s)");
96 } 91 }
97} 92}
98 93
@@ -106,10 +101,9 @@ xfs_ioerror_alert(
106 xfs_buf_t *bp, 101 xfs_buf_t *bp,
107 xfs_daddr_t blkno) 102 xfs_daddr_t blkno)
108{ 103{
109 cmn_err(CE_ALERT, 104 xfs_alert(mp,
110 "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx" 105 "I/O error occurred: meta-data dev %s block 0x%llx"
111 " (\"%s\") error %d buf count %zd", 106 " (\"%s\") error %d buf count %zd",
112 (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,
113 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), 107 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
114 (__uint64_t)blkno, func, 108 (__uint64_t)blkno, func,
115 XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp)); 109 XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));
@@ -173,17 +167,9 @@ xfs_extlen_t
173xfs_get_extsz_hint( 167xfs_get_extsz_hint(
174 struct xfs_inode *ip) 168 struct xfs_inode *ip)
175{ 169{
176 xfs_extlen_t extsz; 170 if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize)
177 171 return ip->i_d.di_extsize;
178 if (unlikely(XFS_IS_REALTIME_INODE(ip))) { 172 if (XFS_IS_REALTIME_INODE(ip))
179 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) 173 return ip->i_mount->m_sb.sb_rextsize;
180 ? ip->i_d.di_extsize 174 return 0;
181 : ip->i_mount->m_sb.sb_rextsize;
182 ASSERT(extsz);
183 } else {
184 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
185 ? ip->i_d.di_extsize : 0;
186 }
187
188 return extsz;
189} 175}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index c2042b736b81..06a9759b6352 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -469,8 +469,6 @@ void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
469void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); 469void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
470void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); 470void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
471void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); 471void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
472int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
473 xfs_ino_t , uint, uint, struct xfs_inode **);
474void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); 472void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
475void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint); 473void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint);
476void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *); 474void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index c5bbbc45db91..12aff9584e29 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -563,7 +563,7 @@ xfs_trans_ail_delete_bulk(
563 563
564 spin_unlock(&ailp->xa_lock); 564 spin_unlock(&ailp->xa_lock);
565 if (!XFS_FORCED_SHUTDOWN(mp)) { 565 if (!XFS_FORCED_SHUTDOWN(mp)) {
566 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, 566 xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
567 "%s: attempting to delete a log item that is not in the AIL", 567 "%s: attempting to delete a log item that is not in the AIL",
568 __func__); 568 __func__);
569 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 569 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index c47918c302a5..03b3b7f85a3b 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -305,7 +305,7 @@ xfs_trans_read_buf(
305 if (xfs_error_target == target) { 305 if (xfs_error_target == target) {
306 if (((xfs_req_num++) % xfs_error_mod) == 0) { 306 if (((xfs_req_num++) % xfs_error_mod) == 0) {
307 xfs_buf_relse(bp); 307 xfs_buf_relse(bp);
308 cmn_err(CE_DEBUG, "Returning error!\n"); 308 xfs_debug(mp, "Returning error!");
309 return XFS_ERROR(EIO); 309 return XFS_ERROR(EIO);
310 } 310 }
311 } 311 }
@@ -383,7 +383,8 @@ xfs_trans_read_buf(
383 bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); 383 bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
384 if (bp == NULL) { 384 if (bp == NULL) {
385 *bpp = NULL; 385 *bpp = NULL;
386 return 0; 386 return (flags & XBF_TRYLOCK) ?
387 0 : XFS_ERROR(ENOMEM);
387 } 388 }
388 if (XFS_BUF_GETERROR(bp) != 0) { 389 if (XFS_BUF_GETERROR(bp) != 0) {
389 XFS_BUF_SUPER_STALE(bp); 390 XFS_BUF_SUPER_STALE(bp);
@@ -403,7 +404,7 @@ xfs_trans_read_buf(
403 xfs_force_shutdown(tp->t_mountp, 404 xfs_force_shutdown(tp->t_mountp,
404 SHUTDOWN_META_IO_ERROR); 405 SHUTDOWN_META_IO_ERROR);
405 xfs_buf_relse(bp); 406 xfs_buf_relse(bp);
406 cmn_err(CE_DEBUG, "Returning trans error!\n"); 407 xfs_debug(mp, "Returning trans error!");
407 return XFS_ERROR(EIO); 408 return XFS_ERROR(EIO);
408 } 409 }
409 } 410 }
@@ -427,7 +428,7 @@ shutdown_abort:
427 */ 428 */
428#if defined(DEBUG) 429#if defined(DEBUG)
429 if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) 430 if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp))
430 cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp); 431 xfs_notice(mp, "about to pop assert, bp == 0x%p", bp);
431#endif 432#endif
432 ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) != 433 ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) !=
433 (XBF_STALE|XBF_DELWRI)); 434 (XBF_STALE|XBF_DELWRI));
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index ccb34532768b..16084d8ea231 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -44,28 +44,6 @@ xfs_trans_inode_broot_debug(
44#endif 44#endif
45 45
46/* 46/*
47 * Get an inode and join it to the transaction.
48 */
49int
50xfs_trans_iget(
51 xfs_mount_t *mp,
52 xfs_trans_t *tp,
53 xfs_ino_t ino,
54 uint flags,
55 uint lock_flags,
56 xfs_inode_t **ipp)
57{
58 int error;
59
60 error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp);
61 if (!error && tp) {
62 xfs_trans_ijoin(tp, *ipp);
63 (*ipp)->i_itemp->ili_lock_flags = lock_flags;
64 }
65 return error;
66}
67
68/*
69 * Add a locked inode to the transaction. 47 * Add a locked inode to the transaction.
70 * 48 *
71 * The inode must be locked, and it cannot be associated with any transaction. 49 * The inode must be locked, and it cannot be associated with any transaction.
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index d8e6f8cd6f0c..c48b4217ec47 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1189,9 +1189,8 @@ xfs_inactive(
1189 * inode might be lost for a long time or forever. 1189 * inode might be lost for a long time or forever.
1190 */ 1190 */
1191 if (!XFS_FORCED_SHUTDOWN(mp)) { 1191 if (!XFS_FORCED_SHUTDOWN(mp)) {
1192 cmn_err(CE_NOTE, 1192 xfs_notice(mp, "%s: xfs_ifree returned error %d",
1193 "xfs_inactive: xfs_ifree() returned an error = %d on %s", 1193 __func__, error);
1194 error, mp->m_fsname);
1195 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1194 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1196 } 1195 }
1197 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 1196 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
@@ -1208,12 +1207,12 @@ xfs_inactive(
1208 */ 1207 */
1209 error = xfs_bmap_finish(&tp, &free_list, &committed); 1208 error = xfs_bmap_finish(&tp, &free_list, &committed);
1210 if (error) 1209 if (error)
1211 xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " 1210 xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
1212 "xfs_bmap_finish() returned error %d", error); 1211 __func__, error);
1213 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1212 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1214 if (error) 1213 if (error)
1215 xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " 1214 xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
1216 "xfs_trans_commit() returned error %d", error); 1215 __func__, error);
1217 } 1216 }
1218 1217
1219 /* 1218 /*
@@ -1310,7 +1309,7 @@ xfs_create(
1310 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, 1309 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
1311 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 1310 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
1312 if (error) 1311 if (error)
1313 goto std_return; 1312 return error;
1314 1313
1315 if (is_dir) { 1314 if (is_dir) {
1316 rdev = 0; 1315 rdev = 0;
@@ -1390,12 +1389,6 @@ xfs_create(
1390 } 1389 }
1391 1390
1392 /* 1391 /*
1393 * At this point, we've gotten a newly allocated inode.
1394 * It is locked (and joined to the transaction).
1395 */
1396 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1397
1398 /*
1399 * Now we join the directory inode to the transaction. We do not do it 1392 * Now we join the directory inode to the transaction. We do not do it
1400 * earlier because xfs_dir_ialloc might commit the previous transaction 1393 * earlier because xfs_dir_ialloc might commit the previous transaction
1401 * (and release all the locks). An error from here on will result in 1394 * (and release all the locks). An error from here on will result in
@@ -1440,22 +1433,13 @@ xfs_create(
1440 */ 1433 */
1441 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); 1434 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
1442 1435
1443 /*
1444 * xfs_trans_commit normally decrements the vnode ref count
1445 * when it unlocks the inode. Since we want to return the
1446 * vnode to the caller, we bump the vnode ref count now.
1447 */
1448 IHOLD(ip);
1449
1450 error = xfs_bmap_finish(&tp, &free_list, &committed); 1436 error = xfs_bmap_finish(&tp, &free_list, &committed);
1451 if (error) 1437 if (error)
1452 goto out_abort_rele; 1438 goto out_bmap_cancel;
1453 1439
1454 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1440 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1455 if (error) { 1441 if (error)
1456 IRELE(ip); 1442 goto out_release_inode;
1457 goto out_dqrele;
1458 }
1459 1443
1460 xfs_qm_dqrele(udqp); 1444 xfs_qm_dqrele(udqp);
1461 xfs_qm_dqrele(gdqp); 1445 xfs_qm_dqrele(gdqp);
@@ -1469,27 +1453,21 @@ xfs_create(
1469 cancel_flags |= XFS_TRANS_ABORT; 1453 cancel_flags |= XFS_TRANS_ABORT;
1470 out_trans_cancel: 1454 out_trans_cancel:
1471 xfs_trans_cancel(tp, cancel_flags); 1455 xfs_trans_cancel(tp, cancel_flags);
1472 out_dqrele: 1456 out_release_inode:
1457 /*
1458 * Wait until after the current transaction is aborted to
1459 * release the inode. This prevents recursive transactions
1460 * and deadlocks from xfs_inactive.
1461 */
1462 if (ip)
1463 IRELE(ip);
1464
1473 xfs_qm_dqrele(udqp); 1465 xfs_qm_dqrele(udqp);
1474 xfs_qm_dqrele(gdqp); 1466 xfs_qm_dqrele(gdqp);
1475 1467
1476 if (unlock_dp_on_error) 1468 if (unlock_dp_on_error)
1477 xfs_iunlock(dp, XFS_ILOCK_EXCL); 1469 xfs_iunlock(dp, XFS_ILOCK_EXCL);
1478 std_return:
1479 return error; 1470 return error;
1480
1481 out_abort_rele:
1482 /*
1483 * Wait until after the current transaction is aborted to
1484 * release the inode. This prevents recursive transactions
1485 * and deadlocks from xfs_inactive.
1486 */
1487 xfs_bmap_cancel(&free_list);
1488 cancel_flags |= XFS_TRANS_ABORT;
1489 xfs_trans_cancel(tp, cancel_flags);
1490 IRELE(ip);
1491 unlock_dp_on_error = B_FALSE;
1492 goto out_dqrele;
1493} 1471}
1494 1472
1495#ifdef DEBUG 1473#ifdef DEBUG
@@ -2114,9 +2092,8 @@ xfs_symlink(
2114 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, 2092 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA,
2115 &first_block, resblks, mval, &nmaps, 2093 &first_block, resblks, mval, &nmaps,
2116 &free_list); 2094 &free_list);
2117 if (error) { 2095 if (error)
2118 goto error1; 2096 goto error2;
2119 }
2120 2097
2121 if (resblks) 2098 if (resblks)
2122 resblks -= fs_blocks; 2099 resblks -= fs_blocks;
@@ -2148,7 +2125,7 @@ xfs_symlink(
2148 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, 2125 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
2149 &first_block, &free_list, resblks); 2126 &first_block, &free_list, resblks);
2150 if (error) 2127 if (error)
2151 goto error1; 2128 goto error2;
2152 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2129 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2153 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2130 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2154 2131
@@ -2161,13 +2138,6 @@ xfs_symlink(
2161 xfs_trans_set_sync(tp); 2138 xfs_trans_set_sync(tp);
2162 } 2139 }
2163 2140
2164 /*
2165 * xfs_trans_commit normally decrements the vnode ref count
2166 * when it unlocks the inode. Since we want to return the
2167 * vnode to the caller, we bump the vnode ref count now.
2168 */
2169 IHOLD(ip);
2170
2171 error = xfs_bmap_finish(&tp, &free_list, &committed); 2141 error = xfs_bmap_finish(&tp, &free_list, &committed);
2172 if (error) { 2142 if (error) {
2173 goto error2; 2143 goto error2;
@@ -2861,7 +2831,8 @@ xfs_change_file_space(
2861 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 2831 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
2862 2832
2863 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2833 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2864 xfs_trans_set_sync(tp); 2834 if (attr_flags & XFS_ATTR_SYNC)
2835 xfs_trans_set_sync(tp);
2865 2836
2866 error = xfs_trans_commit(tp, 0); 2837 error = xfs_trans_commit(tp, 0);
2867 2838
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index f6702927eee4..3bcd23353d6c 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
18#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ 18#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
19#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ 19#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
20#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ 20#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */
21#define XFS_ATTR_SYNC 0x10 /* synchronous operation required */
21 22
22int xfs_readlink(struct xfs_inode *ip, char *link); 23int xfs_readlink(struct xfs_inode *ip, char *link);
23int xfs_release(struct xfs_inode *ip); 24int xfs_release(struct xfs_inode *ip);